Compare commits
147 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 397d2a1588 | |||
| 65567d0d83 | |||
| 0abd984285 | |||
| ec21c9bb29 | |||
| cb2ef9d0e7 | |||
| da79dde611 | |||
| 935ea1bf9f | |||
| 11e752ac01 | |||
| 03472072b8 | |||
| 63e6f3ef91 | |||
| a00bb349c4 | |||
| 78c7bc16b0 | |||
| 1f98f31f83 | |||
| 6d508cf53f | |||
| 591dcfb139 | |||
| 4881056528 | |||
| 6da60d1287 | |||
| baafab50c5 | |||
| 9b5b9ad3a2 | |||
| 1b4c55af65 | |||
| 01607f8614 | |||
| d27cf3545b | |||
| 144bd5fdf9 | |||
| c617a686d6 | |||
| 09ff51c5ae | |||
| 5716d227b1 | |||
| 67ccbb46fd | |||
| 6d5ca5ec9d | |||
| fde5b39d53 | |||
| de9264baf7 | |||
| 305c7dc851 | |||
| 10f9574bcd | |||
| a0afa7ab6f | |||
| 4655f68e87 | |||
| 677c28aeca | |||
| 1f065d67bb | |||
| fe70910755 | |||
| fd6f236a5c | |||
| 200bdf990f | |||
| 3e5cc86c5a | |||
| 3e3e68fd3a | |||
| fd6ae98222 | |||
| b4ac0cda43 | |||
| a41f271c58 | |||
| be72627aeb | |||
| ef92b07448 | |||
| 5b301f9354 | |||
| 2e297b430e | |||
| 7bc6ad9823 | |||
| 6ccdf45179 | |||
| 69483786aa | |||
| 1f5ab16b18 | |||
| a8d04cded4 | |||
| 8308beb5bb | |||
| b9633e5b1a | |||
| d55807947e | |||
| d9fd0a147e | |||
| 03593d4304 | |||
| 87355c3efb | |||
| f92d148881 | |||
| 50c520e1ff | |||
| 8380cb7946 | |||
| 6d8ab54f46 | |||
| e19c240a79 | |||
| 5c38bc3bfe | |||
| b5687aece8 | |||
| cdb6ebdb6a | |||
| bb85f1a56e | |||
| 44c4d89011 | |||
| eaccbcdcf1 | |||
| 4e3cff0729 | |||
| 09c819d424 | |||
| 29b55bfd01 | |||
| 4092bdfb1a | |||
| 743dca2fb3 | |||
| 92bba64772 | |||
| 7d14635a72 | |||
| 58aa217428 | |||
| a05dba49f7 | |||
| 3efe86e29e | |||
| c0320c35f0 | |||
| 0f4a1b268b | |||
| 3eb4749b4d | |||
| 983ab56662 | |||
| 90bdb8c329 | |||
| d185e317df | |||
| 72cda5877a | |||
| 963821a681 | |||
| 2385ab7996 | |||
| 6c10c33572 | |||
| a4622d5e9a | |||
| 41d5f2d2ea | |||
| 52af81537d | |||
| 4f90be9311 | |||
| d613d98c72 | |||
| bd381b3ffd | |||
| 8054719956 | |||
| 4049dc8c7f | |||
| 7bf20fce85 | |||
| 8028c14356 | |||
| cfa6674ac1 | |||
| 667a30870d | |||
| 8768a7b3ef | |||
| 1e56e35dcc | |||
| 95165fe972 | |||
| e78017ed8c | |||
| e078a686bf | |||
| f0db02d8ef | |||
| 373346a0ba | |||
| df1aaa37f8 | |||
| 9b0ff37973 | |||
| b227502cef | |||
| 43a03c168c | |||
| 8f37e16892 | |||
| 14235656cc | |||
| 0d18a5d467 | |||
| f48520c86a | |||
| 956230aec1 | |||
| ff20b33b75 | |||
| b47f56d60a | |||
| e9e9c6c8fc | |||
| ee75f149ae | |||
| 2f65dd1a61 | |||
| 28bef63569 | |||
| ed989d81fd | |||
| d4fd46155e | |||
| 5407fabe1d | |||
| ed41d21eac | |||
| 5854d4406d | |||
| 20de13e48e | |||
| 8af4e42f44 | |||
| 762c523d59 | |||
| 12e6150219 | |||
| a93e9f677c | |||
| d5f63dc082 | |||
| 5d98e373e3 | |||
| d881403d11 | |||
| 690765b53e | |||
| aa183efdca | |||
| f5fed74d6f | |||
| d7a4d40d47 | |||
| 9a12ee18b2 | |||
| b0549e6f05 | |||
| a579a84c7f | |||
| 7450fcfb07 | |||
| 07275bf92f | |||
| e06ea310a8 |
@@ -4,6 +4,7 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- v2-dev
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
@@ -18,19 +19,37 @@ jobs:
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.22'
|
||||
go-version: '1.25'
|
||||
|
||||
- name: Go Build
|
||||
run: |
|
||||
go build ./cmd/server/...
|
||||
go build ./cmd/agent/...
|
||||
go build ./cmd/mcp-server/...
|
||||
go build ./cmd/cli/...
|
||||
|
||||
- name: Go Vet
|
||||
run: go vet ./...
|
||||
|
||||
- name: Install golangci-lint
|
||||
run: |
|
||||
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v2.11.4
|
||||
|
||||
- name: Run golangci-lint
|
||||
run: golangci-lint run ./... --timeout 5m
|
||||
|
||||
- name: Install govulncheck
|
||||
run: go install golang.org/x/vuln/cmd/govulncheck@latest
|
||||
|
||||
- name: Run govulncheck
|
||||
run: govulncheck ./...
|
||||
|
||||
- name: Race Detection
|
||||
run: go test -race ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/scheduler/... ./internal/connector/... ./internal/domain/... ./internal/validation/... -count=1 -timeout 300s
|
||||
|
||||
- name: Go Test with Coverage
|
||||
run: |
|
||||
go test ./internal/service/... ./internal/api/handler/... ./internal/integration/... ./internal/connector/issuer/local/... -count=1 -cover -coverprofile=coverage.out
|
||||
go test ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/integration/... ./internal/connector/issuer/... ./internal/connector/target/... ./internal/connector/notifier/... ./internal/mcp/... ./internal/cli/... ./internal/domain/... ./internal/validation/... -count=1 -cover -coverprofile=coverage.out
|
||||
|
||||
- name: Check Coverage Thresholds
|
||||
run: |
|
||||
@@ -38,7 +57,7 @@ jobs:
|
||||
echo "=== Coverage Report ==="
|
||||
go tool cover -func=coverage.out | tail -1
|
||||
|
||||
# Check service layer coverage (target: 70%+)
|
||||
# Check service layer coverage (target: 60%+)
|
||||
SERVICE_COV=$(go tool cover -func=coverage.out | grep 'internal/service' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}')
|
||||
echo "Service layer coverage: ${SERVICE_COV}%"
|
||||
|
||||
@@ -46,13 +65,29 @@ jobs:
|
||||
HANDLER_COV=$(go tool cover -func=coverage.out | grep 'internal/api/handler' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}')
|
||||
echo "Handler layer coverage: ${HANDLER_COV}%"
|
||||
|
||||
# Check domain layer coverage (target: 40%+)
|
||||
DOMAIN_COV=$(go tool cover -func=coverage.out | grep 'internal/domain' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}')
|
||||
echo "Domain layer coverage: ${DOMAIN_COV}%"
|
||||
|
||||
# Check middleware layer coverage (target: 50%+)
|
||||
MIDDLEWARE_COV=$(go tool cover -func=coverage.out | grep 'internal/api/middleware' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}')
|
||||
echo "Middleware layer coverage: ${MIDDLEWARE_COV}%"
|
||||
|
||||
# Fail if thresholds not met
|
||||
if [ "$(echo "$SERVICE_COV < 30" | bc -l)" -eq 1 ]; then
|
||||
echo "::error::Service layer coverage ${SERVICE_COV}% is below 30% threshold"
|
||||
if [ "$(echo "$SERVICE_COV < 55" | bc -l)" -eq 1 ]; then
|
||||
echo "::error::Service layer coverage ${SERVICE_COV}% is below 55% threshold"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$(echo "$HANDLER_COV < 50" | bc -l)" -eq 1 ]; then
|
||||
echo "::error::Handler layer coverage ${HANDLER_COV}% is below 50% threshold"
|
||||
if [ "$(echo "$HANDLER_COV < 60" | bc -l)" -eq 1 ]; then
|
||||
echo "::error::Handler layer coverage ${HANDLER_COV}% is below 60% threshold"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$(echo "$DOMAIN_COV < 40" | bc -l)" -eq 1 ]; then
|
||||
echo "::error::Domain layer coverage ${DOMAIN_COV}% is below 40% threshold"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$(echo "$MIDDLEWARE_COV < 30" | bc -l)" -eq 1 ]; then
|
||||
echo "::error::Middleware layer coverage ${MIDDLEWARE_COV}% is below 30% threshold"
|
||||
exit 1
|
||||
fi
|
||||
echo "Coverage thresholds passed!"
|
||||
@@ -90,3 +125,20 @@ jobs:
|
||||
- name: Build Frontend
|
||||
working-directory: web
|
||||
run: npx vite build
|
||||
|
||||
helm-lint:
|
||||
name: Helm Chart Validation
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install Helm
|
||||
uses: azure/setup-helm@v4
|
||||
with:
|
||||
version: '3.13.0'
|
||||
|
||||
- name: Lint Helm Chart
|
||||
run: helm lint deploy/helm/certctl/
|
||||
|
||||
- name: Template Helm Chart
|
||||
run: helm template certctl deploy/helm/certctl/ > /dev/null
|
||||
|
||||
@@ -65,8 +65,8 @@ jobs:
|
||||
## Docker Images
|
||||
|
||||
```bash
|
||||
docker pull ghcr.io/shankar0123/certctl-server:${{ steps.version.outputs.VERSION }}
|
||||
docker pull ghcr.io/shankar0123/certctl-agent:${{ steps.version.outputs.VERSION }}
|
||||
docker pull shankar0123.docker.scarf.sh/certctl-server:${{ steps.version.outputs.VERSION }}
|
||||
docker pull shankar0123.docker.scarf.sh/certctl-agent:${{ steps.version.outputs.VERSION }}
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
@@ -43,6 +43,11 @@ vendor/
|
||||
tmp/
|
||||
temp/
|
||||
*.log
|
||||
*.bak
|
||||
|
||||
# Private keys (agent-generated, never commit)
|
||||
cmd/agent/*.key
|
||||
cmd/agent/*.pem
|
||||
|
||||
# Database
|
||||
*.db
|
||||
@@ -54,9 +59,15 @@ temp/
|
||||
# Build artifacts
|
||||
certctl-server
|
||||
certctl-agent
|
||||
certctl-cli
|
||||
/server
|
||||
/agent
|
||||
|
||||
# Private strategy docs
|
||||
roadmap.md
|
||||
SECURITY_REMEDIATION.md
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
mcp-server
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
version: "2"
|
||||
|
||||
run:
|
||||
timeout: 5m
|
||||
|
||||
linters:
|
||||
default: none
|
||||
enable:
|
||||
- govet
|
||||
- staticcheck
|
||||
- unused
|
||||
settings:
|
||||
staticcheck:
|
||||
checks:
|
||||
- "all"
|
||||
- "-ST1005" # error strings should not be capitalized (pre-existing style)
|
||||
- "-ST1000" # package comment style (pre-existing)
|
||||
- "-ST1003" # naming convention (pre-existing)
|
||||
- "-ST1016" # method receiver naming (pre-existing)
|
||||
- "-QF1001" # apply De Morgan's law (style suggestion)
|
||||
- "-QF1003" # convert if/else to switch (style suggestion)
|
||||
- "-QF1012" # use fmt.Fprintf (style suggestion)
|
||||
- "-SA1019" # deprecated API usage (elliptic.Marshal — Go hasn't removed it)
|
||||
- "-SA9003" # empty branch (intentional in switch stubs)
|
||||
- "-S1009" # redundant nil check (pre-existing style)
|
||||
- "-S1011" # use single append with spread (pre-existing style)
|
||||
exclusions:
|
||||
max-issues-per-linter: 0
|
||||
max-same-issues: 0
|
||||
|
||||
# Linters temporarily disabled — re-enable incrementally as pre-existing issues are fixed:
|
||||
# - errcheck (50 issues — unchecked error returns throughout codebase)
|
||||
# - gocritic (50 issues — diagnostic/performance suggestions)
|
||||
# - gosec (23 issues — security warnings in test/stub code)
|
||||
# - ineffassign (13 issues — dead assignments)
|
||||
# - noctx (25 issues — http.Get without context)
|
||||
# - bodyclose (response body close missing)
|
||||
@@ -12,7 +12,7 @@ COPY web/ .
|
||||
RUN npm run build
|
||||
|
||||
# Stage 2: Build Go binary
|
||||
FROM golang:1.22-alpine AS builder
|
||||
FROM golang:1.25-alpine AS builder
|
||||
|
||||
RUN apk add --no-cache git ca-certificates tzdata
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Multi-stage build for certctl agent
|
||||
# Stage 1: Build
|
||||
FROM golang:1.22-alpine AS builder
|
||||
FROM golang:1.25-alpine AS builder
|
||||
|
||||
RUN apk add --no-cache git ca-certificates
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ Change Date: March 14, 2033
|
||||
Change License: Apache License, Version 2.0
|
||||
|
||||
For information about alternative licensing arrangements for the Licensed Work,
|
||||
please contact: skreddy040@gmail.com
|
||||
please contact: certctl@proton.me
|
||||
|
||||
Notice
|
||||
|
||||
|
||||
@@ -1,194 +0,0 @@
|
||||
# PostgreSQL Repository Implementation
|
||||
|
||||
## Overview
|
||||
Complete PostgreSQL implementation for the certctl certificate control plane using `database/sql` and `lib/pq` driver. All 71 interface methods across 11 repositories have been implemented.
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
internal/repository/postgres/
|
||||
├── db.go # Database connection and migration setup
|
||||
├── certificate.go # CertificateRepository (8 methods)
|
||||
├── issuer.go # IssuerRepository (5 methods)
|
||||
├── target.go # TargetRepository (6 methods)
|
||||
├── agent.go # AgentRepository (7 methods)
|
||||
├── job.go # JobRepository (9 methods)
|
||||
├── policy.go # PolicyRepository (7 methods)
|
||||
├── audit.go # AuditRepository (2 methods)
|
||||
├── notification.go # NotificationRepository (3 methods)
|
||||
├── team.go # TeamRepository (5 methods)
|
||||
└── owner.go # OwnerRepository (5 methods)
|
||||
```
|
||||
|
||||
## Key Implementation Details
|
||||
|
||||
### Database Connection (db.go)
|
||||
- `NewDB(connStr string)` - Opens PostgreSQL connection with connection pooling
|
||||
- Max open connections: 25
|
||||
- Max idle connections: 5
|
||||
- Verifies connection with Ping()
|
||||
|
||||
- `RunMigrations(db, migrationsPath)` - Executes SQL migration files
|
||||
- Reads all `.sql` files from migrations directory
|
||||
- Executes files in alphabetical order
|
||||
- Simple approach without external migration library
|
||||
|
||||
### Data Patterns Used
|
||||
|
||||
1. **UUID Generation**: Using `github.com/google/uuid` for ID generation
|
||||
2. **Parameterized Queries**: All queries use `$1, $2, etc.` parameter placeholders
|
||||
3. **Context Propagation**: All database operations use `*Context` variants
|
||||
4. **Nullable Types**:
|
||||
- `sql.NullTime` for optional timestamps
|
||||
- `sql.NullString` for optional strings
|
||||
5. **JSON Handling**:
|
||||
- `json.Marshal/Unmarshal` for JSONB columns
|
||||
- Config fields stored as `json.RawMessage`
|
||||
6. **Array Handling**:
|
||||
- `pq.Array()` for storing Go slices in PostgreSQL arrays
|
||||
- `pq.StringArray` for scanning string arrays
|
||||
7. **RETURNING Clauses**: Used in CREATE operations to retrieve generated IDs
|
||||
|
||||
### Error Handling
|
||||
- All errors wrapped with `fmt.Errorf` for context
|
||||
- Specific error messages for not found cases
|
||||
- Row count verification for UPDATE/DELETE operations
|
||||
|
||||
## Repository Implementations
|
||||
|
||||
### CertificateRepository (8 methods)
|
||||
- Manages certificate lifecycle with filtering by status, environment, owner, team, issuer
|
||||
- Pagination support (default 50, max 500 per page)
|
||||
- Certificate versioning with history tracking
|
||||
- Expiration tracking and notifications
|
||||
- Tags stored as JSON
|
||||
|
||||
### IssuerRepository (5 methods)
|
||||
- Manages certificate authorities (ACME, GenericCA)
|
||||
- Configuration stored as JSON for flexibility
|
||||
- Enable/disable issuers
|
||||
|
||||
### TargetRepository (6 methods)
|
||||
- Manages deployment targets (NGINX, F5, IIS)
|
||||
- Lists targets associated with certificates via join table
|
||||
- Configuration stored as JSON
|
||||
|
||||
### AgentRepository (7 methods)
|
||||
- Manages control plane agents with status tracking
|
||||
- Heartbeat update functionality
|
||||
- API key hash lookup for authentication
|
||||
- Last heartbeat timestamp tracking
|
||||
|
||||
### JobRepository (9 methods)
|
||||
- Manages renewal, deployment, issuance, and validation jobs
|
||||
- Status tracking with error messages
|
||||
- Attempt counters for retry logic
|
||||
- Pending job retrieval by type
|
||||
- Filtering by status and certificate
|
||||
|
||||
### PolicyRepository (7 methods)
|
||||
- Policy rules with multiple enforcement types
|
||||
- Policy violation recording and querying
|
||||
- Configurable rules stored as JSON
|
||||
- Severity levels for violations (Warning, Error, Critical)
|
||||
|
||||
### AuditRepository (2 methods)
|
||||
- Records all control plane actions
|
||||
- Filtering by actor, resource type, time range
|
||||
- Pagination support
|
||||
- Details stored as JSON
|
||||
|
||||
### NotificationRepository (3 methods)
|
||||
- Notification event tracking
|
||||
- Multiple channels (Email, Webhook, Slack)
|
||||
- Delivery status tracking
|
||||
- Certificate-specific notification filtering
|
||||
|
||||
### TeamRepository (5 methods)
|
||||
- Organizational unit management
|
||||
- Basic CRUD operations
|
||||
- Team descriptions for organization
|
||||
|
||||
### OwnerRepository (5 methods)
|
||||
- Certificate owner management
|
||||
- Email field for notifications
|
||||
- Team affiliation tracking
|
||||
- Basic CRUD operations
|
||||
|
||||
## Database Assumptions
|
||||
|
||||
The implementation expects the following table structures:
|
||||
|
||||
**certificates**
|
||||
- id, name, common_name, sans (array), environment, owner_id, team_id, issuer_id
|
||||
- status, expires_at, tags (json), last_renewal_at, last_deployment_at
|
||||
- created_at, updated_at
|
||||
|
||||
**certificate_versions**
|
||||
- id, certificate_id, serial_number, not_before, not_after
|
||||
- fingerprint_sha256, pem_chain, csr_pem, created_at
|
||||
|
||||
**certificate_target_mappings** (join table)
|
||||
- certificate_id, target_id
|
||||
|
||||
**issuers**
|
||||
- id, name, type, config (json), enabled, created_at, updated_at
|
||||
|
||||
**deployment_targets**
|
||||
- id, name, type, agent_id, config (json), enabled, created_at, updated_at
|
||||
|
||||
**agents**
|
||||
- id, name, hostname, status, last_heartbeat_at, registered_at, api_key_hash
|
||||
|
||||
**jobs**
|
||||
- id, type, certificate_id, target_id, status, attempts, max_attempts
|
||||
- last_error, scheduled_at, started_at, completed_at, created_at
|
||||
|
||||
**policy_rules**
|
||||
- id, name, type, config (json), enabled, created_at, updated_at
|
||||
|
||||
**policy_violations**
|
||||
- id, certificate_id, rule_id, message, severity, created_at
|
||||
|
||||
**audit_events**
|
||||
- id, actor, actor_type, action, resource_type, resource_id, details (json), timestamp
|
||||
|
||||
**notifications**
|
||||
- id, type, certificate_id, channel, recipient, message, sent_at, status, error, created_at
|
||||
|
||||
**teams**
|
||||
- id, name, description, created_at, updated_at
|
||||
|
||||
**owners**
|
||||
- id, name, email, team_id, created_at, updated_at
|
||||
|
||||
## Integration Points
|
||||
|
||||
Constructor functions for each repository:
|
||||
```go
|
||||
NewCertificateRepository(db *sql.DB) *CertificateRepository
|
||||
NewIssuerRepository(db *sql.DB) *IssuerRepository
|
||||
NewTargetRepository(db *sql.DB) *TargetRepository
|
||||
NewAgentRepository(db *sql.DB) *AgentRepository
|
||||
NewJobRepository(db *sql.DB) *JobRepository
|
||||
NewPolicyRepository(db *sql.DB) *PolicyRepository
|
||||
NewAuditRepository(db *sql.DB) *AuditRepository
|
||||
NewNotificationRepository(db *sql.DB) *NotificationRepository
|
||||
NewTeamRepository(db *sql.DB) *TeamRepository
|
||||
NewOwnerRepository(db *sql.DB) *OwnerRepository
|
||||
```
|
||||
|
||||
## Dependencies
|
||||
- `database/sql` (stdlib)
|
||||
- `github.com/lib/pq` v1.10.9
|
||||
- `github.com/google/uuid` v1.6.0
|
||||
|
||||
## Notes
|
||||
|
||||
1. All list operations support pagination with configurable page size (default 50, max 500)
|
||||
2. Filtering is dynamic - only conditions with non-empty values are added to WHERE clause
|
||||
3. Timestamps use `time.Time` for CreatedAt/UpdatedAt with automatic Now() on updates
|
||||
4. Array fields use `pq.Array()` for proper PostgreSQL array handling
|
||||
5. Nullable fields use `sql.Null*` types for proper NULL handling
|
||||
6. All operations are context-aware and respect cancellation signals
|
||||
7. Error messages are descriptive and wrapped for debugging
|
||||
@@ -1,272 +0,0 @@
|
||||
# PostgreSQL Implementation Patterns
|
||||
|
||||
## Consistent Patterns Across All Repositories
|
||||
|
||||
### 1. Package Structure
|
||||
```go
|
||||
package postgres
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"github.com/google/uuid"
|
||||
"github.com/lib/pq"
|
||||
)
|
||||
```
|
||||
|
||||
### 2. Repository Constructor Pattern
|
||||
```go
|
||||
type CertificateRepository struct {
|
||||
db *sql.DB
|
||||
}
|
||||
|
||||
func NewCertificateRepository(db *sql.DB) *CertificateRepository {
|
||||
return &CertificateRepository{db: db}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. UUID Generation Pattern
|
||||
```go
|
||||
if cert.ID == "" {
|
||||
cert.ID = uuid.New().String()
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Parameterized Queries Pattern
|
||||
All queries use `$1, $2, $3...` placeholders:
|
||||
```go
|
||||
err := r.db.QueryRowContext(ctx, `
|
||||
SELECT id, name FROM table WHERE id = $1
|
||||
`, id).Scan(&result.ID, &result.Name)
|
||||
```
|
||||
|
||||
### 5. Context Propagation Pattern
|
||||
```go
|
||||
// QueryContext for SELECT
|
||||
rows, err := r.db.QueryContext(ctx, query, args...)
|
||||
|
||||
// QueryRowContext for single row
|
||||
row := r.db.QueryRowContext(ctx, query, args...)
|
||||
|
||||
// ExecContext for INSERT/UPDATE/DELETE
|
||||
result, err := r.db.ExecContext(ctx, query, args...)
|
||||
```
|
||||
|
||||
### 6. NULL Handling Pattern
|
||||
```go
|
||||
// For nullable types, use sql.Null*
|
||||
var agent.LastHeartbeatAt *time.Time
|
||||
|
||||
// Scan handles NULL automatically
|
||||
err := row.Scan(&agent.LastHeartbeatAt)
|
||||
```
|
||||
|
||||
### 7. Array Handling Pattern (pq)
|
||||
```go
|
||||
import "github.com/lib/pq"
|
||||
|
||||
// Storing arrays
|
||||
pq.Array(cert.SANs) // Converts []string to PostgreSQL array
|
||||
|
||||
// Scanning arrays
|
||||
var sans pq.StringArray
|
||||
row.Scan(&sans)
|
||||
cert.SANs = []string(sans)
|
||||
```
|
||||
|
||||
### 8. JSON Handling Pattern
|
||||
```go
|
||||
import "encoding/json"
|
||||
|
||||
// For JSONB config columns (stored as json.RawMessage)
|
||||
issuer.Config // type: json.RawMessage
|
||||
|
||||
// For tags (stored as JSON string)
|
||||
tagsJSON, err := json.Marshal(cert.Tags)
|
||||
row.Scan(&tagsJSON)
|
||||
json.Unmarshal(tagsJSON, &cert.Tags)
|
||||
```
|
||||
|
||||
### 9. Pagination Pattern
|
||||
```go
|
||||
// Set defaults
|
||||
if filter.Page < 1 {
|
||||
filter.Page = 1
|
||||
}
|
||||
if filter.PerPage == 0 || filter.PerPage > 500 {
|
||||
filter.PerPage = 50
|
||||
}
|
||||
|
||||
// Calculate offset
|
||||
offset := (filter.Page - 1) * filter.PerPage
|
||||
|
||||
// Add to query
|
||||
query += fmt.Sprintf("LIMIT $%d OFFSET $%d", argCount, argCount+1)
|
||||
args = append(args, filter.PerPage, offset)
|
||||
```
|
||||
|
||||
### 10. Dynamic WHERE Clause Pattern
|
||||
```go
|
||||
var whereConditions []string
|
||||
var args []interface{}
|
||||
argCount := 1
|
||||
|
||||
if filter.Status != "" {
|
||||
whereConditions = append(whereConditions, fmt.Sprintf("status = $%d", argCount))
|
||||
args = append(args, filter.Status)
|
||||
argCount++
|
||||
}
|
||||
|
||||
whereClause := ""
|
||||
if len(whereConditions) > 0 {
|
||||
whereClause = "WHERE " + strings.Join(whereConditions, " AND ")
|
||||
}
|
||||
```
|
||||
|
||||
### 11. Row Count Verification Pattern
|
||||
```go
|
||||
result, err := r.db.ExecContext(ctx, query, args...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update: %w", err)
|
||||
}
|
||||
|
||||
rows, err := result.RowsAffected()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get rows affected: %w", err)
|
||||
}
|
||||
|
||||
if rows == 0 {
|
||||
return fmt.Errorf("entity not found")
|
||||
}
|
||||
```
|
||||
|
||||
### 12. Not Found Error Pattern
|
||||
```go
|
||||
row := r.db.QueryRowContext(ctx, query, args...)
|
||||
entity, err := scanEntity(row)
|
||||
if err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
return nil, fmt.Errorf("entity not found")
|
||||
}
|
||||
return nil, fmt.Errorf("failed to query entity: %w", err)
|
||||
}
|
||||
```
|
||||
|
||||
### 13. Scanner Helper Pattern (for reusable scanning)
|
||||
```go
|
||||
func scanEntity(scanner interface {
|
||||
Scan(...interface{}) error
|
||||
}) (*domain.Entity, error) {
|
||||
var e domain.Entity
|
||||
err := scanner.Scan(&e.ID, &e.Name, ...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to scan entity: %w", err)
|
||||
}
|
||||
return &e, nil
|
||||
}
|
||||
|
||||
// Used in both single row and multiple rows contexts
|
||||
row := r.db.QueryRowContext(ctx, query)
|
||||
entity, err := scanEntity(row)
|
||||
|
||||
for rows.Next() {
|
||||
entity, err := scanEntity(rows)
|
||||
}
|
||||
```
|
||||
|
||||
### 14. List Query Pattern
|
||||
```go
|
||||
// Get total count first
|
||||
countQuery := fmt.Sprintf("SELECT COUNT(*) FROM table %s", whereClause)
|
||||
var total int
|
||||
r.db.QueryRowContext(ctx, countQuery, args...).Scan(&total)
|
||||
|
||||
// Then get paginated results
|
||||
rows, err := r.db.QueryContext(ctx, paginatedQuery, args...)
|
||||
defer rows.Close()
|
||||
|
||||
var results []*domain.Entity
|
||||
for rows.Next() {
|
||||
entity, err := scanEntity(rows)
|
||||
results = append(results, entity)
|
||||
}
|
||||
```
|
||||
|
||||
### 15. Error Wrapping Pattern
|
||||
```go
|
||||
// All errors wrapped with context
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create entity: %w", err)
|
||||
}
|
||||
```
|
||||
|
||||
### 16. RETURNING Clause Pattern (for retrieving generated IDs)
|
||||
```go
|
||||
err := r.db.QueryRowContext(ctx, `
|
||||
INSERT INTO table (col1, col2)
|
||||
VALUES ($1, $2)
|
||||
RETURNING id
|
||||
`, val1, val2).Scan(&entity.ID)
|
||||
```
|
||||
|
||||
### 17. Join Table Pattern (for many-to-many)
|
||||
```go
|
||||
// ListByCertificate uses certificate_target_mappings join table
|
||||
rows, err := r.db.QueryContext(ctx, `
|
||||
SELECT dt.id, dt.name, dt.type, dt.agent_id, dt.config, dt.enabled, dt.created_at, dt.updated_at
|
||||
FROM deployment_targets dt
|
||||
INNER JOIN certificate_target_mappings ctm ON dt.id = ctm.target_id
|
||||
WHERE ctm.certificate_id = $1
|
||||
ORDER BY dt.created_at DESC
|
||||
`, certID)
|
||||
```
|
||||
|
||||
## Type-Specific Patterns
|
||||
|
||||
### Certificate with Arrays and JSON
|
||||
```go
|
||||
// In certificate.go
|
||||
var sans pq.StringArray
|
||||
var tagsJSON []byte
|
||||
|
||||
err := scanner.Scan(&cert.ID, &cert.Name, &cert.CommonName, &sans, ...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to scan: %w", err)
|
||||
}
|
||||
|
||||
cert.SANs = []string(sans)
|
||||
json.Unmarshal(tagsJSON, &cert.Tags)
|
||||
```
|
||||
|
||||
### Agent with Nullable Timestamp
|
||||
```go
|
||||
// In agent.go
|
||||
var agent domain.Agent
|
||||
err := scanner.Scan(&agent.ID, &agent.Name, &agent.Hostname, &agent.Status,
|
||||
&agent.LastHeartbeatAt, &agent.RegisteredAt, &agent.APIKeyHash)
|
||||
// LastHeartbeatAt can be nil, automatically handled by sql.NullTime
|
||||
```
|
||||
|
||||
### Job with Nullable String
|
||||
```go
|
||||
// In job.go
|
||||
var job domain.Job
|
||||
var lastError *string
|
||||
err := scanner.Scan(&job.ID, ..., &lastError, ...)
|
||||
// lastError can be nil for successful jobs
|
||||
job.LastError = lastError
|
||||
```
|
||||
|
||||
## Testing Considerations
|
||||
|
||||
These implementations expect:
|
||||
1. PostgreSQL database with proper schema
|
||||
2. Tables created with matching column names and types
|
||||
3. Foreign key relationships established
|
||||
4. Proper indexes on frequently queried columns
|
||||
|
||||
For testing, consider:
|
||||
- Using `testcontainers-go` for PostgreSQL in Docker
|
||||
- Running migrations before test suite
|
||||
- Using transactions with rollback for test isolation
|
||||
@@ -1,51 +1,155 @@
|
||||
<p align="center">
|
||||
<img src="docs/screenshots/logo/certctl-logo.png" alt="certctl logo" width="450">
|
||||
</p>
|
||||
|
||||
<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=89db181e-76e0-45cc-b9c0-790c3dfdfc73" />
|
||||
<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=b9379aff-9e5c-4d01-8f2d-9e4ffa09d126" />
|
||||
|
||||
# certctl — Self-Hosted Certificate Lifecycle Platform
|
||||
|
||||
A self-hosted certificate lifecycle platform. Track, renew, and deploy TLS certificates across your infrastructure with a web dashboard, REST API, and agent-based architecture where private keys never leave your servers.
|
||||
```mermaid
|
||||
timeline
|
||||
title TLS Certificate Maximum Lifespan (CA/Browser Forum Ballot SC-081v3)
|
||||
2015 : 5 years
|
||||
2018 : 825 days
|
||||
2020 : 398 days
|
||||
March 2026 : 200 days
|
||||
March 2027 : 100 days
|
||||
March 2029 : 47 days
|
||||
```
|
||||
|
||||
TLS certificate lifespans are shrinking fast. The CA/Browser Forum passed [Ballot SC-081v3](https://cabforum.org/2025/04/11/ballot-sc081v3-introduce-schedule-of-reducing-validity-and-data-reuse-periods/) unanimously in April 2025, setting a phased reduction: **200 days** by March 2026, **100 days** by March 2027, and **47 days** by March 2029. Organizations managing dozens or hundreds of certificates can no longer rely on spreadsheets, calendar reminders, or manual renewal workflows. The math doesn't work — at 47-day lifespans, a team managing 100 certificates is processing 7+ renewals per week, every week, forever.
|
||||
|
||||
certctl is a self-hosted platform that automates the entire certificate lifecycle — from issuance through renewal to deployment — with zero human intervention. It works with any certificate authority, deploys to any server, and keeps private keys on your infrastructure where they belong.
|
||||
|
||||
[](LICENSE)
|
||||
[](https://goreportcard.com/report/github.com/shankar0123/certctl)
|
||||

|
||||
[](https://github.com/shankar0123/certctl/releases)
|
||||
|
||||
## Documentation
|
||||
|
||||
| Guide | Description |
|
||||
|-------|-------------|
|
||||
| [Why certctl?](docs/why-certctl.md) | Competitive positioning — how certctl compares to open-source and enterprise certificate management platforms |
|
||||
| [Concepts](docs/concepts.md) | TLS certificates explained from scratch — for beginners who know nothing about certs |
|
||||
| [Quick Start](docs/quickstart.md) | Get running in 5 minutes — dashboard, API, CLI, discovery, stakeholder demo flow |
|
||||
| [Advanced Demo](docs/demo-advanced.md) | Issue a certificate end-to-end with technical deep-dives |
|
||||
| [Architecture](docs/architecture.md) | System design, data flow diagrams, security model |
|
||||
| [Feature Inventory](docs/features.md) | Complete reference of all V2 capabilities, API endpoints, and configuration |
|
||||
| [Connectors](docs/connectors.md) | Build custom issuer, target, and notifier connectors |
|
||||
| [Compliance Mapping](docs/compliance.md) | SOC 2 Type II, PCI-DSS 4.0, NIST SP 800-57 alignment guides |
|
||||
|
||||
> **Next release:** v2.1.0 will be tagged after the full V2 feature suite passes manual QA across all 34 sections of the [testing guide](docs/testing-guide.md). Automated CI (1,471 Go tests + 193 frontend tests) gates every commit; the manual playbook covers integration, deployment, and UX verification that unit tests can't reach.
|
||||
|
||||
## Why certctl Exists
|
||||
|
||||
Certificate lifecycle tooling today falls into two camps: expensive enterprise platforms (Venafi, Keyfactor, Sectigo) that cost six figures and take months to deploy, or single-purpose tools (cert-manager, certbot) that handle one slice of the problem. If you run a mixed infrastructure — some NGINX, some Apache, a few HAProxy nodes, maybe an F5 — and you need to manage certificates from multiple CAs, there's nothing self-hosted that covers the full lifecycle without vendor lock-in.
|
||||
|
||||
certctl fills that gap. It's **CA-agnostic** — the issuer connector interface means you can plug in any certificate authority: a self-signed local CA for dev, Let's Encrypt via ACME for public certs, Smallstep step-ca for your private PKI, your enterprise ADCS via sub-CA mode, or any custom CA through a shell script adapter. You're never locked to a single CA vendor, and you can run multiple issuers simultaneously for different certificate types.
|
||||
|
||||
It's also **target-agnostic**. Agents deploy certificates to NGINX, Apache, HAProxy, Traefik, and Caddy — all using the same pluggable connector model for any server that accepts cert files. The control plane never initiates outbound connections — agents poll for work, which means certctl works behind firewalls, across network zones, and in air-gapped environments.
|
||||
|
||||
For a detailed comparison with CertKit, KeyTalk, and enterprise platforms (Venafi, Keyfactor), see [Why certctl?](docs/why-certctl.md)
|
||||
|
||||
## What It Does
|
||||
|
||||
certctl gives you a single pane of glass for every TLS certificate in your organization. The **web dashboard** shows your full certificate inventory — what's healthy, what's expiring, what's already expired, and who owns each one. The **REST API** (55 endpoints) lets you automate everything. **Agents** deployed on your infrastructure generate private keys locally and submit CSRs — private keys never leave your servers.
|
||||
certctl gives you a single pane of glass for every TLS certificate in your organization:
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph "Control Plane"
|
||||
API["REST API + Dashboard\n:8443"]
|
||||
PG[("PostgreSQL")]
|
||||
end
|
||||
- **Web dashboard** — 22 operational pages: certificate inventory, deployment timeline with TLS verification, bulk operations (renew/revoke/reassign), discovery triage, network scan management, approval workflows, audit trail with CSV/JSON export, agent fleet overview with OS/arch grouping, short-lived credential monitoring, digest email preview
|
||||
- **REST API** — 99 endpoints under `/api/v1/` + `/.well-known/est/` for complete automation, with sparse fields, sort, cursor pagination, and time-range filters
|
||||
- **Agents** — generate private keys locally (ECDSA P-256), discover existing certs on disk (PEM/DER), submit CSRs only (private keys never leave your servers)
|
||||
- **Network scanner** — discovers certificates on TLS endpoints across CIDR ranges without requiring agents, concurrent scanning with configurable timeouts
|
||||
- **Certificate export** — PEM (JSON or file download) and PKCS#12 formats, with audit trail; private keys never included
|
||||
- **S/MIME + EKU support** — issue certificates with emailProtection, codeSigning, timeStamping, clientAuth EKUs; email SAN routing for S/MIME
|
||||
- **EST server** (RFC 7030) — device and WiFi certificate enrollment via industry-standard protocol
|
||||
- **Post-deployment verification** — agent-side TLS probe confirms the target serves the correct certificate by SHA-256 fingerprint match
|
||||
- **Approval workflows** — require human sign-off on renewals before deployment
|
||||
- **Background scheduler** — 7 automated loops: renewal checks, job processing, agent health, notifications, short-lived cert expiry, network scanning, and scheduled certificate digest emails
|
||||
- **ACME Renewal Information (ARI, RFC 9702)** — CA-directed renewal timing; certctl asks the CA when to renew instead of using fixed thresholds
|
||||
- **Scheduled certificate digest emails** — HTML digest with certificate stats, expiration timeline, and job health; optional daily briefing via SMTP
|
||||
- **Helm chart** — Production-ready Kubernetes deployment with server, PostgreSQL, and agent DaemonSet
|
||||
|
||||
subgraph "Your Infrastructure"
|
||||
A1["Agent"] --> T1["NGINX"]
|
||||
A2["Agent"] --> T2["F5 BIG-IP"]
|
||||
A3["Agent"] --> T3["IIS"]
|
||||
end
|
||||
For the full capability breakdown — revocation infrastructure, policy engine, observability, EST enrollment, and more — see the [Feature Inventory](docs/features.md).
|
||||
|
||||
API --> PG
|
||||
A1 & A2 & A3 -->|"CSR + status\n(no private keys)"| API
|
||||
API -->|"Signed certs"| A1 & A2 & A3
|
||||
API -->|"Issue/Renew"| CA["Certificate Authorities\nLocal CA · ACME"]
|
||||
```
|
||||
## Supported Integrations
|
||||
|
||||
### Certificate Issuers
|
||||
| Issuer | Status | Type |
|
||||
|--------|--------|------|
|
||||
| Local CA (self-signed + sub-CA) | Implemented | `GenericCA` |
|
||||
| ACME v2 (Let's Encrypt, Sectigo) | Implemented (HTTP-01 + DNS-01 + DNS-PERSIST-01) | `ACME` |
|
||||
| ACME EAB (ZeroSSL, Google Trust) | Implemented (auto-fetch EAB from ZeroSSL) | `ACME` |
|
||||
| step-ca | Implemented | `StepCA` |
|
||||
| OpenSSL / Custom CA | Implemented | `OpenSSL` |
|
||||
| Vault PKI | Future | — |
|
||||
| DigiCert | Future | — |
|
||||
|
||||
**Note:** ADCS integration is handled via the Local CA's sub-CA mode — certctl operates as a subordinate CA with its signing certificate issued by ADCS. Any CA with a shell-accessible signing interface can be integrated today via the OpenSSL/Custom CA connector.
|
||||
|
||||
### Deployment Targets
|
||||
| Target | Status | Type |
|
||||
|--------|--------|------|
|
||||
| NGINX | Implemented | `NGINX` |
|
||||
| Apache httpd | Implemented | `Apache` |
|
||||
| HAProxy | Implemented | `HAProxy` |
|
||||
| Traefik | Implemented | `Traefik` |
|
||||
| Caddy | Implemented | `Caddy` |
|
||||
| F5 BIG-IP | Interface only | `F5` |
|
||||
| Microsoft IIS | Interface only | `IIS` |
|
||||
|
||||
### Notifiers
|
||||
| Notifier | Status | Type |
|
||||
|----------|--------|------|
|
||||
| Email (SMTP) | Implemented | `Email` |
|
||||
| Webhooks | Implemented | `Webhook` |
|
||||
| Slack | Implemented | `Slack` |
|
||||
| Microsoft Teams | Implemented | `Teams` |
|
||||
| PagerDuty | Implemented | `PagerDuty` |
|
||||
| OpsGenie | Implemented | `OpsGenie` |
|
||||
|
||||
All connectors are pluggable — build your own by implementing the [connector interface](docs/connectors.md).
|
||||
|
||||
### Screenshots
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|  |  |
|
||||
| **Dashboard** — certificate stats, expiry timeline, recent jobs | **Certificates** — full inventory with status, environment, owner filters |
|
||||
|  |  |
|
||||
| **Agents** — fleet health, hostname, heartbeat tracking | **Jobs** — issuance, renewal, deployment job queue |
|
||||
|  |  |
|
||||
| **Notifications** — threshold alerts grouped by certificate | **Policies** — enforcement rules with enable/disable and delete |
|
||||
|  |  |
|
||||
| **Issuers** — CA connectors with test connectivity | **Targets** — deployment targets (NGINX, F5, IIS) |
|
||||
|  | |
|
||||
| **Audit Trail** — immutable log of every action | |
|
||||
<table>
|
||||
<tr>
|
||||
<td><a href="docs/screenshots/v2-dashboard.png"><img src="docs/screenshots/v2-dashboard.png" width="270" alt="Dashboard"></a><br><b>Dashboard</b><br><sub>Stats, expiration heatmap, renewal trends</sub></td>
|
||||
<td><a href="docs/screenshots/v2-certificates.png"><img src="docs/screenshots/v2-certificates.png" width="270" alt="Certificates"></a><br><b>Certificates</b><br><sub>Inventory with status, owner, team filters</sub></td>
|
||||
<td><a href="docs/screenshots/v2-agents.png"><img src="docs/screenshots/v2-agents.png" width="270" alt="Agents"></a><br><b>Agents</b><br><sub>Fleet health, OS/arch, IP, version</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="docs/screenshots/v2-fleet.png"><img src="docs/screenshots/v2-fleet.png" width="270" alt="Fleet Overview"></a><br><b>Fleet Overview</b><br><sub>OS distribution, status breakdown</sub></td>
|
||||
<td><a href="docs/screenshots/v2-jobs.png"><img src="docs/screenshots/v2-jobs.png" width="270" alt="Jobs"></a><br><b>Jobs</b><br><sub>Issuance, renewal, deployment queue</sub></td>
|
||||
<td><a href="docs/screenshots/v2-notifications.png"><img src="docs/screenshots/v2-notifications.png" width="270" alt="Notifications"></a><br><b>Notifications</b><br><sub>Expiration warnings, renewal results</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="docs/screenshots/v2-policies.png"><img src="docs/screenshots/v2-policies.png" width="270" alt="Policies"></a><br><b>Policies</b><br><sub>Ownership, lifetime, renewal rules</sub></td>
|
||||
<td><a href="docs/screenshots/v2-profiles.png"><img src="docs/screenshots/v2-profiles.png" width="270" alt="Profiles"></a><br><b>Profiles</b><br><sub>Key types, max TTL, crypto constraints</sub></td>
|
||||
<td><a href="docs/screenshots/v2-issuers.png"><img src="docs/screenshots/v2-issuers.png" width="270" alt="Issuers"></a><br><b>Issuers</b><br><sub>Local CA, ACME, step-ca connectors</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="docs/screenshots/v2-targets.png"><img src="docs/screenshots/v2-targets.png" width="270" alt="Targets"></a><br><b>Targets</b><br><sub>NGINX, Apache, HAProxy, Traefik, Caddy deployment</sub></td>
|
||||
<td><a href="docs/screenshots/v2-owners.png"><img src="docs/screenshots/v2-owners.png" width="270" alt="Owners"></a><br><b>Owners</b><br><sub>Cert ownership with team assignment</sub></td>
|
||||
<td><a href="docs/screenshots/v2-teams.png"><img src="docs/screenshots/v2-teams.png" width="270" alt="Teams"></a><br><b>Teams</b><br><sub>Org grouping for notification routing</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="docs/screenshots/v2-agent-groups.png"><img src="docs/screenshots/v2-agent-groups.png" width="270" alt="Agent Groups"></a><br><b>Agent Groups</b><br><sub>Dynamic grouping by OS, arch, CIDR</sub></td>
|
||||
<td><a href="docs/screenshots/v2-audit-trail.png"><img src="docs/screenshots/v2-audit-trail.png" width="270" alt="Audit Trail"></a><br><b>Audit Trail</b><br><sub>Immutable log, CSV/JSON export</sub></td>
|
||||
<td><a href="docs/screenshots/v2-short-lived.png"><img src="docs/screenshots/v2-short-lived.png" width="270" alt="Short-Lived"></a><br><b>Short-Lived Creds</b><br><sub>Ephemeral certs with live TTL countdown</sub></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
> **22 operational GUI pages** covering the full certificate lifecycle: dashboard, certificates (list + detail with EKU badges, deployment timeline, TLS verification status), agents, fleet overview, jobs (with approval workflow), notifications, policies, profiles, issuers, targets (wizard with NGINX/Apache/HAProxy/Traefik/Caddy/F5/IIS), owners, teams, agent groups, audit trail, short-lived credentials, discovery triage, and network scan management.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Docker Pull
|
||||
|
||||
```bash
|
||||
docker pull shankar0123.docker.scarf.sh/certctl-server
|
||||
docker pull shankar0123.docker.scarf.sh/certctl-agent
|
||||
```
|
||||
|
||||
### Docker Compose (Recommended)
|
||||
|
||||
```bash
|
||||
@@ -56,7 +160,7 @@ docker compose -f deploy/docker-compose.yml up -d --build
|
||||
|
||||
Wait ~30 seconds, then open **http://localhost:8443** in your browser.
|
||||
|
||||
The dashboard comes pre-loaded with 14 demo certificates, 5 agents, policy rules, audit events, and notifications — a realistic snapshot of a certificate inventory so you can explore immediately.
|
||||
The dashboard comes pre-loaded with 15 demo certificates, 5 agents, policy rules, audit events, and notifications — a realistic snapshot of a certificate inventory so you can explore immediately.
|
||||
|
||||
Verify the API:
|
||||
```bash
|
||||
@@ -64,13 +168,13 @@ curl http://localhost:8443/health
|
||||
# {"status":"healthy"}
|
||||
|
||||
curl -s http://localhost:8443/api/v1/certificates | jq '.total'
|
||||
# 14
|
||||
# 15
|
||||
```
|
||||
|
||||
### Manual Build
|
||||
|
||||
```bash
|
||||
# Prerequisites: Go 1.22+, PostgreSQL 16+
|
||||
# Prerequisites: Go 1.25+, PostgreSQL 16+
|
||||
go mod download
|
||||
make build
|
||||
|
||||
@@ -90,43 +194,9 @@ export CERTCTL_AGENT_ID=agent-local-01
|
||||
./bin/agent --agent-id=agent-local-01
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
| Guide | Description |
|
||||
|-------|-------------|
|
||||
| [Concepts](docs/concepts.md) | TLS certificates explained from scratch — for beginners who know nothing about certs |
|
||||
| [Quick Start](docs/quickstart.md) | Get running in 5 minutes with accurate API examples |
|
||||
| [Demo Walkthrough](docs/demo-guide.md) | 5-7 minute guided stakeholder presentation |
|
||||
| [Advanced Demo](docs/demo-advanced.md) | Issue a certificate end-to-end with technical deep-dives |
|
||||
| [Architecture](docs/architecture.md) | System design, data flow diagrams, security model |
|
||||
| [Connectors](docs/connectors.md) | Build custom issuer, target, and notifier connectors |
|
||||
|
||||
## Architecture
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph "Control Plane (certctl-server)"
|
||||
DASH["Web Dashboard\nReact SPA"]
|
||||
API["REST API\nGo 1.22 net/http"]
|
||||
SVC["Service Layer"]
|
||||
REPO["Repository Layer\ndatabase/sql + lib/pq"]
|
||||
SCHED["Scheduler\nRenewal · Jobs · Health · Notifications"]
|
||||
end
|
||||
|
||||
subgraph "Data Store"
|
||||
PG[("PostgreSQL 16\n14 tables · TEXT primary keys")]
|
||||
end
|
||||
|
||||
subgraph "Agents"
|
||||
AG["certctl-agent\nKey generation · CSR · Deployment"]
|
||||
end
|
||||
|
||||
DASH --> API
|
||||
API --> SVC --> REPO --> PG
|
||||
SCHED --> SVC
|
||||
AG -->|"Heartbeat + CSR"| API
|
||||
API -->|"Cert + Chain"| AG
|
||||
```
|
||||
**Control plane** (Go 1.25 net/http) → **PostgreSQL 16** (21 tables, TEXT primary keys) → **Agents** (key generation, CSR submission, cert deployment). Background scheduler runs 6 loops: renewal checks (1h), job processing (30s), agent health (2m), notifications (1m), short-lived cert expiry (30s), network scanning (6h). See [Architecture Guide](docs/architecture.md) for full system diagrams and data flow.
|
||||
|
||||
### Key Design Decisions
|
||||
|
||||
@@ -135,169 +205,90 @@ flowchart TB
|
||||
- **Handler → Service → Repository layering.** Handlers define their own service interfaces for clean dependency inversion. No global service singletons.
|
||||
- **Idempotent migrations.** All schema uses `IF NOT EXISTS` and seed data uses `ON CONFLICT (id) DO NOTHING`, safe for repeated execution.
|
||||
|
||||
### Database Schema
|
||||
|
||||
| Table | Purpose |
|
||||
|-------|---------|
|
||||
| `managed_certificates` | Certificate records with metadata, status, expiry, tags |
|
||||
| `certificate_versions` | Historical versions with PEM chains and CSRs |
|
||||
| `renewal_policies` | Renewal window, auto-renew settings, retry config, alert thresholds |
|
||||
| `issuers` | CA configurations (Local CA, ACME, etc.) |
|
||||
| `deployment_targets` | Target systems (NGINX, F5, IIS) with agent assignments |
|
||||
| `agents` | Registered agents with heartbeat tracking |
|
||||
| `jobs` | Issuance, renewal, deployment, and validation jobs |
|
||||
| `teams` | Organizational groups for certificate ownership |
|
||||
| `owners` | Individual owners with email for notifications |
|
||||
| `policy_rules` | Enforcement rules (allowed issuers, environments, metadata) |
|
||||
| `policy_violations` | Flagged non-compliance with severity levels |
|
||||
| `audit_events` | Immutable action log (append-only, no update/delete) |
|
||||
| `notification_events` | Email and webhook notification records |
|
||||
| `certificate_target_mappings` | Many-to-many cert ↔ target relationships |
|
||||
PostgreSQL 16 with 21 tables covering certificates, versions, policies, issuers, targets, agents, jobs, teams, owners, profiles, agent groups, revocations, discovery, network scans, and audit events. See the [Architecture Guide](docs/architecture.md) for the full schema.
|
||||
|
||||
## Configuration
|
||||
|
||||
All server environment variables use the `CERTCTL_` prefix:
|
||||
All environment variables use the `CERTCTL_` prefix. Full reference below (39 variables across server, agent, and connector config).
|
||||
|
||||
### Server — Core
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `CERTCTL_SERVER_HOST` | `127.0.0.1` | Server bind address |
|
||||
| `CERTCTL_SERVER_PORT` | `8080` | Server listen port |
|
||||
| `CERTCTL_DATABASE_URL` | `postgres://localhost/certctl` | PostgreSQL connection string |
|
||||
| `CERTCTL_DATABASE_MAX_CONNS` | `25` | Connection pool size |
|
||||
| `CERTCTL_LOG_LEVEL` | `info` | Log level: `debug`, `info`, `warn`, `error` |
|
||||
| `CERTCTL_LOG_FORMAT` | `json` | Log format: `json` or `text` |
|
||||
| `CERTCTL_AUTH_TYPE` | `api-key` | Auth mode: `api-key`, `jwt`, or `none` |
|
||||
| `CERTCTL_AUTH_SECRET` | — | Required for `api-key` and `jwt` auth types |
|
||||
| `CERTCTL_KEYGEN_MODE` | `agent` | Key generation mode: `agent` (production) or `server` (demo only) |
|
||||
| `CERTCTL_ACME_DIRECTORY_URL` | — | ACME directory URL (e.g., Let's Encrypt staging) |
|
||||
| `CERTCTL_ACME_EMAIL` | — | Contact email for ACME account registration |
|
||||
| `CERTCTL_SERVER_PORT` | `8080` | Server listen port (1–65535) |
|
||||
| `CERTCTL_DATABASE_URL` | `postgres://localhost/certctl` | PostgreSQL connection string (required) |
|
||||
| `CERTCTL_DATABASE_MAX_CONNS` | `25` | PostgreSQL connection pool size (min 1) |
|
||||
| `CERTCTL_DATABASE_MIGRATIONS_PATH` | `./migrations` | Path to migration SQL files |
|
||||
| `CERTCTL_MAX_BODY_SIZE` | `1048576` | Max HTTP request body in bytes (default 1MB) |
|
||||
| `CERTCTL_LOG_LEVEL` | `info` | Log verbosity: `debug`, `info`, `warn`, `error` |
|
||||
| `CERTCTL_LOG_FORMAT` | `json` | Log format: `json` (structured) or `text` (human-readable) |
|
||||
|
||||
Agent environment variables:
|
||||
### Server — Auth, CORS, Rate Limiting
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `CERTCTL_AUTH_TYPE` | `api-key` | Auth mode: `api-key`, `jwt`, or `none` (demo only) |
|
||||
| `CERTCTL_AUTH_SECRET` | — | Required for `api-key` and `jwt` auth types |
|
||||
| `CERTCTL_CORS_ORIGINS` | *(empty = deny all)* | Comma-separated allowed origins, or `*` for dev |
|
||||
| `CERTCTL_RATE_LIMIT_ENABLED` | `true` | Enable token bucket rate limiting |
|
||||
| `CERTCTL_RATE_LIMIT_RPS` | `50` | Requests per second per client |
|
||||
| `CERTCTL_RATE_LIMIT_BURST` | `100` | Max burst size |
|
||||
| `CERTCTL_KEYGEN_MODE` | `agent` | Key generation: `agent` (production) or `server` (demo only) |
|
||||
|
||||
### Server — Scheduler
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `CERTCTL_SCHEDULER_RENEWAL_CHECK_INTERVAL` | `1h` | How often to check expiring certs (min 1m) |
|
||||
| `CERTCTL_SCHEDULER_JOB_PROCESSOR_INTERVAL` | `30s` | How often to process pending jobs (min 1s) |
|
||||
| `CERTCTL_SCHEDULER_AGENT_HEALTH_CHECK_INTERVAL` | `2m` | Agent heartbeat check frequency (min 1s) |
|
||||
| `CERTCTL_SCHEDULER_NOTIFICATION_PROCESS_INTERVAL` | `1m` | Notification send frequency (min 1s) |
|
||||
|
||||
### Server — Sub-CA Mode
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `CERTCTL_CA_CERT_PATH` | — | PEM-encoded CA certificate for sub-CA mode |
|
||||
| `CERTCTL_CA_KEY_PATH` | — | PEM-encoded CA private key (RSA, ECDSA, PKCS#8) |
|
||||
|
||||
### Server — Feature Flags
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `CERTCTL_EST_ENABLED` | `false` | Enable RFC 7030 EST enrollment endpoints |
|
||||
| `CERTCTL_EST_ISSUER_ID` | `iss-local` | Which issuer processes EST enrollments |
|
||||
| `CERTCTL_EST_PROFILE_ID` | — | Constrain EST to a specific certificate profile |
|
||||
| `CERTCTL_NETWORK_SCAN_ENABLED` | `false` | Enable server-side TLS network scanning |
|
||||
| `CERTCTL_NETWORK_SCAN_INTERVAL` | `6h` | How often scheduled scans run |
|
||||
| `CERTCTL_VERIFY_DEPLOYMENT` | `true` | TLS verification after certificate deployment |
|
||||
| `CERTCTL_VERIFY_TIMEOUT` | `10s` | TLS probe timeout |
|
||||
| `CERTCTL_VERIFY_DELAY` | `2s` | Delay before verification probe |
|
||||
|
||||
### Server — Notification Connectors
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `CERTCTL_SLACK_WEBHOOK_URL` | — | Slack incoming webhook URL (enables Slack) |
|
||||
| `CERTCTL_SLACK_CHANNEL` | — | Override default webhook channel |
|
||||
| `CERTCTL_SLACK_USERNAME` | `certctl` | Bot display name |
|
||||
| `CERTCTL_TEAMS_WEBHOOK_URL` | — | Microsoft Teams webhook URL (enables Teams) |
|
||||
| `CERTCTL_PAGERDUTY_ROUTING_KEY` | — | PagerDuty Events API v2 key (enables PagerDuty) |
|
||||
| `CERTCTL_PAGERDUTY_SEVERITY` | `warning` | Event severity: `info`, `warning`, `error`, `critical` |
|
||||
| `CERTCTL_OPSGENIE_API_KEY` | — | OpsGenie Alert API key (enables OpsGenie) |
|
||||
| `CERTCTL_OPSGENIE_PRIORITY` | `P3` | Alert priority: `P1`–`P5` |
|
||||
|
||||
### Agent
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `CERTCTL_SERVER_URL` | `http://localhost:8080` | Control plane URL |
|
||||
| `CERTCTL_API_KEY` | — | Agent API key |
|
||||
| `CERTCTL_AGENT_NAME` | `certctl-agent` | Agent display name |
|
||||
| `CERTCTL_API_KEY` | — | Agent API key for authentication |
|
||||
| `CERTCTL_AGENT_ID` | — | Registered agent ID (required) |
|
||||
| `CERTCTL_KEY_DIR` | `/var/lib/certctl/keys` | Directory for storing private keys (agent keygen mode) |
|
||||
| `CERTCTL_KEY_DIR` | `/var/lib/certctl/keys` | Private key storage directory (0600 perms) |
|
||||
| `CERTCTL_DISCOVERY_DIRS` | — | Directories to scan for existing certs (comma-separated) |
|
||||
|
||||
Docker Compose overrides these for the demo stack (see `deploy/docker-compose.yml`): port `8443`, auth type `none`, database pointing to the postgres container.
|
||||
|
||||
## API Overview
|
||||
|
||||
All endpoints are under `/api/v1/` and return JSON. List endpoints support pagination (`?page=1&per_page=50`).
|
||||
|
||||
### Certificates
|
||||
```
|
||||
GET /api/v1/certificates List (filter: status, environment, owner_id, team_id)
|
||||
POST /api/v1/certificates Create
|
||||
GET /api/v1/certificates/{id} Get
|
||||
PUT /api/v1/certificates/{id} Update
|
||||
DELETE /api/v1/certificates/{id} Archive (soft delete)
|
||||
GET /api/v1/certificates/{id}/versions Version history
|
||||
POST /api/v1/certificates/{id}/renew Trigger renewal → 202 Accepted
|
||||
POST /api/v1/certificates/{id}/deploy Trigger deployment → 202 Accepted
|
||||
```
|
||||
|
||||
### Agents
|
||||
```
|
||||
GET /api/v1/agents List
|
||||
POST /api/v1/agents Register
|
||||
GET /api/v1/agents/{id} Get
|
||||
POST /api/v1/agents/{id}/heartbeat Record heartbeat
|
||||
POST /api/v1/agents/{id}/csr Submit CSR for issuance
|
||||
GET /api/v1/agents/{id}/certificates/{certId} Retrieve signed certificate
|
||||
GET /api/v1/agents/{id}/work Poll for pending deployment jobs
|
||||
POST /api/v1/agents/{id}/jobs/{jobId}/status Report job completion/failure
|
||||
```
|
||||
|
||||
### Infrastructure
|
||||
```
|
||||
GET /api/v1/issuers List issuers
|
||||
POST /api/v1/issuers Create
|
||||
GET /api/v1/issuers/{id} Get
|
||||
PUT /api/v1/issuers/{id} Update
|
||||
DELETE /api/v1/issuers/{id} Delete
|
||||
POST /api/v1/issuers/{id}/test Test connectivity
|
||||
|
||||
GET /api/v1/targets List deployment targets
|
||||
POST /api/v1/targets Create
|
||||
GET /api/v1/targets/{id} Get
|
||||
PUT /api/v1/targets/{id} Update
|
||||
DELETE /api/v1/targets/{id} Delete
|
||||
```
|
||||
|
||||
### Organization
|
||||
```
|
||||
GET /api/v1/teams List teams
|
||||
POST /api/v1/teams Create
|
||||
GET /api/v1/teams/{id} Get
|
||||
PUT /api/v1/teams/{id} Update
|
||||
DELETE /api/v1/teams/{id} Delete
|
||||
GET /api/v1/owners List owners
|
||||
POST /api/v1/owners Create
|
||||
GET /api/v1/owners/{id} Get
|
||||
PUT /api/v1/owners/{id} Update
|
||||
DELETE /api/v1/owners/{id} Delete
|
||||
```
|
||||
|
||||
### Operations
|
||||
```
|
||||
GET /api/v1/jobs List (filter: status, type)
|
||||
GET /api/v1/jobs/{id} Get
|
||||
POST /api/v1/jobs/{id}/cancel Cancel
|
||||
|
||||
GET /api/v1/policies List policy rules
|
||||
POST /api/v1/policies Create
|
||||
PUT /api/v1/policies/{id} Update (enable/disable)
|
||||
DELETE /api/v1/policies/{id} Delete
|
||||
GET /api/v1/policies/{id}/violations List violations for rule
|
||||
|
||||
GET /api/v1/audit Query audit trail
|
||||
GET /api/v1/notifications List notifications
|
||||
POST /api/v1/notifications/{id}/read Mark as read
|
||||
```
|
||||
|
||||
### Auth
|
||||
```
|
||||
GET /api/v1/auth/info Auth mode info (no auth required)
|
||||
GET /api/v1/auth/check Validate credentials
|
||||
```
|
||||
|
||||
### Health
|
||||
```
|
||||
GET /health Server health check
|
||||
GET /ready Readiness check
|
||||
```
|
||||
|
||||
## Supported Integrations
|
||||
|
||||
### Certificate Issuers
|
||||
| Issuer | Status | Type |
|
||||
|--------|--------|------|
|
||||
| Local CA (self-signed) | Implemented | `GenericCA` |
|
||||
| ACME v2 (Let's Encrypt, Sectigo) | Implemented (HTTP-01) | `ACME` |
|
||||
| step-ca | Planned (V2) | — |
|
||||
| OpenSSL / Custom CA | Planned (V2) | — |
|
||||
| ADCS (Active Directory CS) | Planned (V2) | — |
|
||||
| Vault PKI | Planned | — |
|
||||
| DigiCert | Planned | — |
|
||||
|
||||
### Deployment Targets
|
||||
| Target | Status | Type |
|
||||
|--------|--------|------|
|
||||
| NGINX | Implemented | `NGINX` |
|
||||
| F5 BIG-IP | Interface only (V2) | `F5` |
|
||||
| Microsoft IIS | Interface only (V2) | `IIS` |
|
||||
| Kubernetes Secrets | Planned | — |
|
||||
|
||||
### Notifiers
|
||||
| Notifier | Status | Type |
|
||||
|----------|--------|------|
|
||||
| Email (SMTP) | Implemented | `Email` |
|
||||
| Webhooks | Implemented | `Webhook` |
|
||||
| Slack | Planned | — |
|
||||
Docker Compose overrides for the demo stack are in `deploy/docker-compose.yml`.
|
||||
|
||||
## Development
|
||||
|
||||
@@ -308,16 +299,26 @@ make install-tools
|
||||
# Run tests
|
||||
make test
|
||||
|
||||
# Run tests with race detection (same as CI)
|
||||
go test -race ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/scheduler/... ./internal/connector/... ./internal/domain/... ./internal/validation/...
|
||||
|
||||
# Run with coverage
|
||||
make test-coverage
|
||||
|
||||
# Lint
|
||||
# Lint (runs golangci-lint with project config)
|
||||
make lint
|
||||
|
||||
# Vulnerability scan
|
||||
govulncheck ./...
|
||||
|
||||
# Format
|
||||
make fmt
|
||||
```
|
||||
|
||||
### CI Pipeline
|
||||
|
||||
Every push and PR runs: `go vet`, `go test -race` (race detection), `golangci-lint` (11 linters including gosec and bodyclose), `govulncheck` (dependency CVE scanning), and per-layer coverage thresholds (service 60%, handler 60%, domain 40%, middleware 50%). Frontend CI runs TypeScript type checking, Vitest tests, and Vite production build. See `.github/workflows/ci.yml` for details.
|
||||
|
||||
### Docker Compose
|
||||
|
||||
```bash
|
||||
@@ -339,31 +340,177 @@ make docker-clean # Stop + remove volumes
|
||||
- API key and JWT auth types supported; `none` for demo/development
|
||||
- Auth type and secret configured via `CERTCTL_AUTH_TYPE` and `CERTCTL_AUTH_SECRET`
|
||||
|
||||
### CORS
|
||||
- **Deny-by-default**: Empty `CERTCTL_CORS_ORIGINS` blocks all cross-origin requests. Operators must explicitly list allowed origins (comma-separated) or set `*` for development.
|
||||
|
||||
### Input Validation
|
||||
- Shell command injection prevention on all connector scripts (strict character whitelist, no metacharacters)
|
||||
- RFC 1123 domain name validation, base64url ACME token validation
|
||||
- SSRF protection in network scanner (loopback, link-local, multicast, broadcast ranges filtered)
|
||||
|
||||
### Concurrency Safety
|
||||
- Scheduler loops protected by `sync/atomic.Bool` idempotency guards — duplicate ticks are skipped
|
||||
- Graceful shutdown waits up to 30 seconds for in-flight work before database close
|
||||
|
||||
### Audit Trail
|
||||
- Immutable append-only log in PostgreSQL (`audit_events` table)
|
||||
- Every action attributed to an actor with timestamp and resource reference
|
||||
- Every lifecycle action attributed to an actor with timestamp and resource reference
|
||||
- No update or delete operations on audit records
|
||||
- Every API call recorded to audit trail with method, path, actor, SHA-256 body hash, response status, and latency
|
||||
|
||||
## API Overview
|
||||
|
||||
99 endpoints under `/api/v1/` + `/.well-known/est/`, all returning JSON. List endpoints support pagination, sparse field selection (`?fields=`), sort (`?sort=-notAfter`), time-range filters, and cursor-based pagination. Full request/response schemas in the [OpenAPI 3.1 spec](api/openapi.yaml).
|
||||
|
||||
### Key Endpoints
|
||||
```
|
||||
# Certificate lifecycle
|
||||
GET /api/v1/certificates List (filter, sort, cursor, sparse fields)
|
||||
POST /api/v1/certificates/{id}/renew Trigger renewal → 202 Accepted
|
||||
POST /api/v1/certificates/{id}/revoke Revoke with RFC 5280 reason code
|
||||
GET /api/v1/certificates/{id}/export/pem Export PEM (JSON or file download)
|
||||
POST /api/v1/certificates/{id}/export/pkcs12 Export PKCS#12 bundle (no private key)
|
||||
GET /api/v1/crl/{issuer_id} DER-encoded X.509 CRL
|
||||
GET /api/v1/ocsp/{issuer_id}/{serial} OCSP responder (good/revoked/unknown)
|
||||
|
||||
# Agent operations
|
||||
POST /api/v1/agents/{id}/csr Submit CSR for issuance
|
||||
GET /api/v1/agents/{id}/work Poll for pending deployment jobs
|
||||
POST /api/v1/agents/{id}/discoveries Submit certificate discovery scan results
|
||||
|
||||
# Discovery & network scanning
|
||||
GET /api/v1/discovered-certificates List discovered certs (?agent_id, ?status)
|
||||
POST /api/v1/discovered-certificates/{id}/claim Link to managed cert
|
||||
POST /api/v1/network-scan-targets/{id}/scan Trigger immediate TLS scan
|
||||
|
||||
# Jobs & approval
|
||||
POST /api/v1/jobs/{id}/approve Approve interactive renewal
|
||||
POST /api/v1/jobs/{id}/reject Reject interactive renewal
|
||||
|
||||
# Post-deployment verification
|
||||
POST /api/v1/jobs/{id}/verify Submit TLS verification result
|
||||
GET /api/v1/jobs/{id}/verification Get verification status
|
||||
|
||||
# Observability
|
||||
GET /api/v1/metrics/prometheus Prometheus exposition format
|
||||
GET /api/v1/stats/summary Dashboard summary
|
||||
|
||||
# Digest emails (scheduled briefing)
|
||||
GET /api/v1/digest/preview HTML email preview
|
||||
POST /api/v1/digest/send Send digest immediately
|
||||
|
||||
# EST enrollment (RFC 7030)
|
||||
POST /.well-known/est/simpleenroll Device certificate enrollment
|
||||
GET /.well-known/est/cacerts CA certificate chain (PKCS#7)
|
||||
```
|
||||
|
||||
Full CRUD is available for certificates, agents, issuers, targets, teams, owners, policies, profiles, agent groups, notifications, and audit events. See the [OpenAPI spec](api/openapi.yaml) or [Feature Inventory](docs/features.md) for the complete endpoint reference.
|
||||
|
||||
## CLI
|
||||
|
||||
```bash
|
||||
# Install
|
||||
go install github.com/shankar0123/certctl/cmd/cli@latest
|
||||
|
||||
# Configure
|
||||
export CERTCTL_SERVER_URL=http://localhost:8443
|
||||
export CERTCTL_API_KEY=your-api-key
|
||||
|
||||
# Certificate commands
|
||||
certctl-cli certs list # List all certificates
|
||||
certctl-cli certs get mc-api-prod # Get certificate details
|
||||
certctl-cli certs renew mc-api-prod # Trigger renewal
|
||||
certctl-cli certs revoke mc-api-prod --reason keyCompromise
|
||||
|
||||
# Agent and job commands
|
||||
certctl-cli agents list # List registered agents
|
||||
certctl-cli jobs list # List jobs
|
||||
certctl-cli jobs cancel job-123 # Cancel a pending job
|
||||
|
||||
# Operations
|
||||
certctl-cli status # Server health + summary stats
|
||||
certctl-cli import certs.pem # Bulk import from PEM file
|
||||
|
||||
# Output formats
|
||||
certctl-cli certs list --format json # JSON output (default: table)
|
||||
```
|
||||
|
||||
## MCP Server (AI Integration)
|
||||
|
||||
certctl ships a standalone MCP (Model Context Protocol) server that exposes all 78 API endpoints as tools for AI assistants — Claude, Cursor, Windsurf, OpenClaw, VS Code Copilot, and any MCP-compatible client.
|
||||
|
||||
```bash
|
||||
# Install
|
||||
go install github.com/shankar0123/certctl/cmd/mcp-server@latest
|
||||
|
||||
# Configure
|
||||
export CERTCTL_SERVER_URL=http://localhost:8443
|
||||
export CERTCTL_API_KEY=your-api-key
|
||||
|
||||
# Run (stdio transport — add to your AI client config)
|
||||
mcp-server
|
||||
```
|
||||
|
||||
**Claude Desktop** (`claude_desktop_config.json`):
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"certctl": {
|
||||
"command": "mcp-server",
|
||||
"env": {
|
||||
"CERTCTL_SERVER_URL": "http://localhost:8443",
|
||||
"CERTCTL_API_KEY": "your-api-key"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Roadmap
|
||||
|
||||
### V1 (feature-complete → v1.0.0 tag pending)
|
||||
All nine development milestones (M1–M9) are complete. The backend covers the full certificate lifecycle: Local CA and ACME v2 issuers, NGINX/F5/IIS target connectors, threshold-based expiration alerting, agent-side ECDSA P-256 key generation, API auth with rate limiting, and a React dashboard with 11 views wired to the real API. The CI pipeline runs build, vet, test with coverage gates (service layer 30%+, handler layer 50%+), frontend type checking, Vitest test suite, and Vite production build on every push. 220+ tests total: 170+ Go tests across service, handler, integration, and connector layers, plus 53 frontend Vitest tests covering API client functions and utility helpers.
|
||||
|
||||
Remaining before the v1.0.0 tag: dashboard screenshots in README, tagged Docker images published, final error-handling audit to confirm no panics or unhandled error paths.
|
||||
### V1 (v1.0.0)
|
||||
Core lifecycle management — Local CA + ACME v2 issuers, NGINX target connector, agent-side key generation, API auth + rate limiting, React dashboard, CI pipeline with coverage gates, Docker images on GHCR.
|
||||
|
||||
### V2: Operational Maturity
|
||||
- **V2.0: Operational Workflows** — ACME DNS-01 challenges (wildcard certs, custom validation scripts), step-ca, ADCS, and OpenSSL/custom CA issuer connectors, F5 BIG-IP, IIS, Apache httpd, and HAProxy target connector implementations, agent metadata collection (OS, platform, IP, hostname via heartbeat), dynamic device grouping for policy-based targeting, crypto policy enforcement, certificate ownership tracking, renewal approval UI, bulk cert operations, deployment timeline, real-time updates (SSE/WebSocket), target config wizard
|
||||
- **V2.1: Team Adoption** — OIDC/SSO, RBAC, CLI tool, Slack/Teams/PagerDuty/OpsGenie notifiers, bulk cert import
|
||||
- **V2.2: Observability** — expiration calendar, health scores, compliance scoring, Prometheus metrics, deployment rollback
|
||||
- **V2.3: Integrations & Distribution** — MCP server (OpenClaw/Claude/Cursor), CT Log monitoring, DigiCert issuer connector, filesystem cert discovery
|
||||
|
||||
### V3: Discovery, Visibility & Cloud
|
||||
Discovery engine (passive/active scanning, cert chain validation, Nmap/Qualys import, unknown cert detection, triage workflows), cloud targets (AWS ALB, Azure Key Vault, Palo Alto, FortiGate, Citrix ADC, Kubernetes Secrets), extended issuers (Entrust, GlobalSign, Google CAS, EJBCA, Vault PKI), ServiceNow integration, Ansible module, compliance mapping docs
|
||||
30 milestones complete, 1500+ tests. See the [Feature Inventory](docs/features.md) for details on every capability.
|
||||
|
||||
### V4+: Platform & Scale
|
||||
Kubernetes CRD, Terraform provider, multi-region, HA control plane, HSM support, LDAP auth, API key scoping, multi-tenancy
|
||||
**What shipped (all ✅):**
|
||||
|
||||
- **Issuers** — Sub-CA mode (enterprise root chains), ACME DNS-01 + DNS-PERSIST-01 (wildcard certs, any DNS provider), step-ca (native /sign API), OpenSSL/Custom CA (script-based signing), ACME ARI (RFC 9702, CA-directed renewal timing)
|
||||
- **Revocation** — RFC 5280 reason codes, DER-encoded X.509 CRL, embedded OCSP responder, short-lived cert exemption
|
||||
- **Profiles + Ownership** — certificate profiles (key types, max TTL, crypto constraints), ownership tracking (owners + teams), dynamic agent groups, interactive renewal approval
|
||||
- **GUI Operations** — bulk renew/revoke/reassign, deployment timeline, inline policy editor, target wizard, audit export (CSV/JSON), short-lived credentials view
|
||||
- **Discovery** — filesystem scanning (PEM/DER) + network TLS scanning (CIDR ranges), triage workflow (claim/dismiss), network scan target management
|
||||
- **Observability** — Prometheus + JSON metrics, 5 stats API endpoints, dashboard charts (heatmap, trends, distribution), agent fleet overview, structured logging
|
||||
- **EST Server** (RFC 7030) — device/WiFi certificate enrollment, PKCS#7 wire format, configurable issuer + profile binding
|
||||
- **MCP Server** — 78 API operations as AI tools for Claude, Cursor, and any MCP-compatible client
|
||||
- **CLI** — 12 subcommands (list/get/renew/revoke certs, agents, jobs, import, status), JSON/table output
|
||||
- **Notifications** — Email (SMTP), Webhooks, Slack, Microsoft Teams, PagerDuty, OpsGenie connectors
|
||||
- **API Enhancements** — sparse fields, sort, time-range filters, cursor pagination, immutable API audit logging
|
||||
- **Compliance Mapping** — SOC 2 Type II, PCI-DSS 4.0, NIST SP 800-57 alignment guides
|
||||
|
||||
- **Post-Deployment TLS Verification** — agent-side TLS probe confirms the target is serving the correct certificate by SHA-256 fingerprint match, verification status visible in deployment timeline
|
||||
- **Traefik + Caddy Targets** — Traefik (file provider, auto-reload) and Caddy (Admin API hot-reload or file-based), both in target wizard GUI
|
||||
- **Certificate Export** — PEM (JSON or file download) and PKCS#12 formats, private keys never included (agent-side only), audit trail, GUI export buttons
|
||||
- **S/MIME Support** — EKU-aware issuance (emailProtection, codeSigning, timeStamping), adaptive KeyUsage flags, email SAN routing, EKU badges in GUI
|
||||
- **ACME ARI (RFC 9702)** — CA-directed renewal timing with graceful threshold fallback for non-ARI CAs, reduces unnecessary early renewals
|
||||
- **Scheduled Certificate Digest** — HTML email digests with certificate stats, expiration timeline, job trends, and agent health; optional daily/hourly/weekly briefings via SMTP
|
||||
- **Helm Chart** — Production-ready Kubernetes with server Deployment, PostgreSQL StatefulSet, Agent DaemonSet, security contexts, resource limits, optional Ingress, ServiceAccount
|
||||
- **ACME ARI (RFC 9702)** — CA-directed renewal timing: instead of renewing at fixed thresholds, the CA tells certctl the optimal renewal window, gracefully degrading to thresholds when ARI is unavailable
|
||||
- **Email Digest Service** — Scheduled HTML digest emails with certificate stats, expiration timeline (90d), job health, and active agent count; falls back to certificate owner emails if no recipients configured
|
||||
- **Helm Chart** — Production-ready Kubernetes deployment with server Deployment, PostgreSQL StatefulSet with PVC, Agent DaemonSet, optional Ingress, security contexts, and full values.yaml configuration
|
||||
|
||||
### V3: certctl Pro
|
||||
|
||||
Team access controls, identity provider integration, enterprise deployment targets, compliance and risk scoring, advanced fleet operations, event-driven architecture, advanced search, real-time operational views, and premium CA integrations.
|
||||
|
||||
### V4+: Cloud, Scale & Passive Discovery
|
||||
Passive network discovery (TLS listener), Kubernetes integration (cert-manager external issuer, Secrets target), cloud infrastructure targets (AWS ALB/ACM, Azure Key Vault), extended CA support (Vault PKI, Google CAS, EJBCA), and platform-scale features (Terraform provider, multi-tenancy, HSM support).
|
||||
|
||||
## License
|
||||
|
||||
Certctl is licensed under the [Business Source License 1.1](LICENSE). The source code is publicly available and free to use, modify, and self-host. The one restriction: you may not offer certctl as a managed/hosted certificate management service to third parties.
|
||||
|
||||
For licensing inquiries: certctl@proton.me
|
||||
|
||||
|
||||
@@ -0,0 +1,830 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/ecdsa"
|
||||
"crypto/elliptic"
|
||||
"crypto/rand"
|
||||
"crypto/rsa"
|
||||
"crypto/x509"
|
||||
"crypto/x509/pkix"
|
||||
"encoding/json"
|
||||
"encoding/pem"
|
||||
"io"
|
||||
"log/slog"
|
||||
"math/big"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestAgent_Heartbeat_Success tests that heartbeat sends correct metadata and handles 200 response.
|
||||
func TestAgent_Heartbeat_Success(t *testing.T) {
|
||||
// Create mock server to validate heartbeat request
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
// Verify correct endpoint and method
|
||||
if r.URL.Path != "/api/v1/agents/a-test-agent/heartbeat" {
|
||||
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||
}
|
||||
if r.Method != http.MethodPost {
|
||||
t.Errorf("unexpected method: %s, expected POST", r.Method)
|
||||
}
|
||||
|
||||
// Verify auth header
|
||||
auth := r.Header.Get("Authorization")
|
||||
if auth != "Bearer test-key" {
|
||||
t.Errorf("unexpected auth header: %s", auth)
|
||||
}
|
||||
|
||||
// Verify request body contains required fields
|
||||
var payload map[string]string
|
||||
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
|
||||
t.Fatalf("failed to decode payload: %v", err)
|
||||
}
|
||||
|
||||
// Check required fields
|
||||
if _, ok := payload["version"]; !ok {
|
||||
t.Error("missing version in heartbeat")
|
||||
}
|
||||
if _, ok := payload["hostname"]; !ok {
|
||||
t.Error("missing hostname in heartbeat")
|
||||
}
|
||||
if _, ok := payload["os"]; !ok {
|
||||
t.Error("missing os in heartbeat")
|
||||
}
|
||||
if _, ok := payload["architecture"]; !ok {
|
||||
t.Error("missing architecture in heartbeat")
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test-agent",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
// Should not panic
|
||||
agent.sendHeartbeat(context.Background())
|
||||
}
|
||||
|
||||
// TestAgent_Heartbeat_ServerError tests that heartbeat handles 500 response gracefully.
|
||||
func TestAgent_Heartbeat_ServerError(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
w.Write([]byte("server error"))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test-agent",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
// Should increment consecutive failures
|
||||
failureBefore := agent.consecutiveFailures
|
||||
agent.sendHeartbeat(context.Background())
|
||||
failureAfter := agent.consecutiveFailures
|
||||
|
||||
if failureAfter != failureBefore+1 {
|
||||
t.Errorf("expected consecutive failures to increment, got %d, want %d", failureAfter, failureBefore+1)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgent_Heartbeat_ConnectionError tests that heartbeat handles connection error.
|
||||
func TestAgent_Heartbeat_ConnectionError(t *testing.T) {
|
||||
// Use an invalid address that will fail immediately
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: "http://invalid-host-that-does-not-exist.local:9999",
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test-agent",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
// Should fail due to connection error
|
||||
agent.sendHeartbeat(context.Background())
|
||||
|
||||
if agent.consecutiveFailures != 1 {
|
||||
t.Errorf("expected consecutive failures to be 1, got %d", agent.consecutiveFailures)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgent_PollWork_NoWork tests that work polling handles empty work list.
|
||||
func TestAgent_PollWork_NoWork(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/api/v1/agents/a-test-agent/work" {
|
||||
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||
}
|
||||
if r.Method != http.MethodGet {
|
||||
t.Errorf("unexpected method: %s", r.Method)
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(w).Encode(WorkResponse{
|
||||
Jobs: []JobItem{},
|
||||
Count: 0,
|
||||
})
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test-agent",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
// Should not panic
|
||||
agent.pollForWork(context.Background())
|
||||
}
|
||||
|
||||
// TestAgent_PollWork_Success tests that work polling parses and returns jobs correctly.
|
||||
func TestAgent_PollWork_Success(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
|
||||
workResp := WorkResponse{
|
||||
Count: 2,
|
||||
Jobs: []JobItem{
|
||||
{
|
||||
ID: "j-csr-001",
|
||||
Type: "Issuance",
|
||||
CertificateID: "mc-001",
|
||||
CommonName: "example.com",
|
||||
SANs: []string{"www.example.com"},
|
||||
Status: "AwaitingCSR",
|
||||
},
|
||||
{
|
||||
ID: "j-deploy-001",
|
||||
Type: "Deployment",
|
||||
CertificateID: "mc-001",
|
||||
TargetID: strPtr("t-nginx-1"),
|
||||
TargetType: "NGINX",
|
||||
TargetConfig: json.RawMessage(`{"cert_path":"/etc/nginx/cert.pem"}`),
|
||||
Status: "Pending",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
json.NewEncoder(w).Encode(workResp)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test-agent",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
// Should not panic; work items are processed in separate gorines in real usage
|
||||
agent.pollForWork(context.Background())
|
||||
}
|
||||
|
||||
// TestSplitPEMChain tests PEM chain splitting into cert and chain.
|
||||
func TestSplitPEMChain(t *testing.T) {
|
||||
// Create two test certificates
|
||||
cert1, _ := generateTestCertWithCN("cert1.example.com")
|
||||
cert2, _ := generateTestCertWithCN("cert2.example.com")
|
||||
|
||||
block1 := &pem.Block{Type: "CERTIFICATE", Bytes: cert1.Raw}
|
||||
block2 := &pem.Block{Type: "CERTIFICATE", Bytes: cert2.Raw}
|
||||
|
||||
cert1PEM := string(pem.EncodeToMemory(block1))
|
||||
cert2PEM := string(pem.EncodeToMemory(block2))
|
||||
|
||||
chainPEM := cert1PEM + "\n" + cert2PEM
|
||||
|
||||
// Split
|
||||
certOnly, chain := splitPEMChain(chainPEM)
|
||||
|
||||
// Verify cert part
|
||||
if !bytes.Contains([]byte(certOnly), []byte("-----BEGIN CERTIFICATE-----")) {
|
||||
t.Error("cert part missing BEGIN marker")
|
||||
}
|
||||
|
||||
// Verify chain part
|
||||
if !bytes.Contains([]byte(chain), []byte("-----BEGIN CERTIFICATE-----")) {
|
||||
t.Error("chain part missing BEGIN marker")
|
||||
}
|
||||
|
||||
// Verify they're different
|
||||
if certOnly == chain {
|
||||
t.Error("cert and chain should be different")
|
||||
}
|
||||
}
|
||||
|
||||
// TestSplitPEMChain_SingleCert tests PEM chain splitting with single certificate.
|
||||
func TestSplitPEMChain_SingleCert(t *testing.T) {
|
||||
cert, _ := generateTestCertWithCN("example.com")
|
||||
block := &pem.Block{Type: "CERTIFICATE", Bytes: cert.Raw}
|
||||
certPEM := string(pem.EncodeToMemory(block))
|
||||
|
||||
certOnly, chain := splitPEMChain(certPEM)
|
||||
|
||||
if certOnly != certPEM {
|
||||
t.Error("single cert should be returned as-is")
|
||||
}
|
||||
if chain != "" {
|
||||
t.Error("chain should be empty for single cert")
|
||||
}
|
||||
}
|
||||
|
||||
// TestSplitPEMChain_InvalidPEM tests PEM chain splitting with invalid PEM.
|
||||
func TestSplitPEMChain_InvalidPEM(t *testing.T) {
|
||||
invalidPEM := "not a valid pem"
|
||||
|
||||
certOnly, chain := splitPEMChain(invalidPEM)
|
||||
|
||||
if certOnly != invalidPEM {
|
||||
t.Error("invalid PEM should be returned as-is in cert part")
|
||||
}
|
||||
if chain != "" {
|
||||
t.Error("chain should be empty for invalid PEM")
|
||||
}
|
||||
}
|
||||
|
||||
// TestParsePEMFile tests parsing a PEM file with certificates.
|
||||
func TestParsePEMFile(t *testing.T) {
|
||||
// Create a temporary file with a PEM certificate
|
||||
tmpdir := t.TempDir()
|
||||
certPath := filepath.Join(tmpdir, "cert.pem")
|
||||
|
||||
cert, _ := generateTestCert()
|
||||
block := &pem.Block{Type: "CERTIFICATE", Bytes: cert.Raw}
|
||||
certPEM := pem.EncodeToMemory(block)
|
||||
|
||||
if err := os.WriteFile(certPath, certPEM, 0644); err != nil {
|
||||
t.Fatalf("failed to write test cert: %v", err)
|
||||
}
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: "http://localhost:8443",
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
// Parse the file
|
||||
entries := agent.parsePEMFile(certPath)
|
||||
|
||||
if len(entries) != 1 {
|
||||
t.Errorf("expected 1 certificate, got %d", len(entries))
|
||||
return
|
||||
}
|
||||
|
||||
entry := entries[0]
|
||||
if entry.CommonName != "test.example.com" {
|
||||
t.Errorf("expected CN 'test.example.com', got '%s'", entry.CommonName)
|
||||
}
|
||||
if entry.SourceFormat != "PEM" {
|
||||
t.Errorf("expected format 'PEM', got '%s'", entry.SourceFormat)
|
||||
}
|
||||
if entry.SourcePath != certPath {
|
||||
t.Errorf("expected path '%s', got '%s'", certPath, entry.SourcePath)
|
||||
}
|
||||
|
||||
// Verify fingerprint is non-empty and correct length (SHA256 hex = 64 chars)
|
||||
if len(entry.FingerprintSHA256) != 64 {
|
||||
t.Errorf("expected 64-char fingerprint, got %d", len(entry.FingerprintSHA256))
|
||||
}
|
||||
}
|
||||
|
||||
// TestParsePEMFile_MultipleCerts tests parsing a PEM file with multiple certificates.
|
||||
func TestParsePEMFile_MultipleCerts(t *testing.T) {
|
||||
tmpdir := t.TempDir()
|
||||
certPath := filepath.Join(tmpdir, "chain.pem")
|
||||
|
||||
cert1, _ := generateTestCertWithCN("cert1.example.com")
|
||||
cert2, _ := generateTestCertWithCN("cert2.example.com")
|
||||
|
||||
block1 := &pem.Block{Type: "CERTIFICATE", Bytes: cert1.Raw}
|
||||
block2 := &pem.Block{Type: "CERTIFICATE", Bytes: cert2.Raw}
|
||||
|
||||
certPEM := append(pem.EncodeToMemory(block1), pem.EncodeToMemory(block2)...)
|
||||
|
||||
if err := os.WriteFile(certPath, certPEM, 0644); err != nil {
|
||||
t.Fatalf("failed to write test cert: %v", err)
|
||||
}
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: "http://localhost:8443",
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
entries := agent.parsePEMFile(certPath)
|
||||
|
||||
if len(entries) != 2 {
|
||||
t.Errorf("expected 2 certificates, got %d", len(entries))
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseDERFile tests parsing a DER-encoded certificate file.
|
||||
func TestParseDERFile(t *testing.T) {
|
||||
tmpdir := t.TempDir()
|
||||
derPath := filepath.Join(tmpdir, "cert.der")
|
||||
|
||||
cert, _ := generateTestCertWithCN("test.example.com")
|
||||
if err := os.WriteFile(derPath, cert.Raw, 0644); err != nil {
|
||||
t.Fatalf("failed to write test cert: %v", err)
|
||||
}
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: "http://localhost:8443",
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
entry, err := agent.parseDERFile(derPath)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if entry.CommonName != "test.example.com" {
|
||||
t.Errorf("expected CN 'test.example.com', got '%s'", entry.CommonName)
|
||||
}
|
||||
if entry.SourceFormat != "DER" {
|
||||
t.Errorf("expected format 'DER', got '%s'", entry.SourceFormat)
|
||||
}
|
||||
if len(entry.FingerprintSHA256) != 64 {
|
||||
t.Errorf("expected 64-char fingerprint, got %d", len(entry.FingerprintSHA256))
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseDERFile_Invalid tests parsing an invalid DER file.
|
||||
func TestParseDERFile_Invalid(t *testing.T) {
|
||||
tmpdir := t.TempDir()
|
||||
derPath := filepath.Join(tmpdir, "invalid.der")
|
||||
|
||||
if err := os.WriteFile(derPath, []byte("not a valid der file"), 0644); err != nil {
|
||||
t.Fatalf("failed to write test file: %v", err)
|
||||
}
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: "http://localhost:8443",
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
_, err := agent.parseDERFile(derPath)
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid DER file")
|
||||
}
|
||||
}
|
||||
|
||||
// TestScanDirectory tests scanning a directory for certificate files.
|
||||
func TestScanDirectory(t *testing.T) {
|
||||
tmpdir := t.TempDir()
|
||||
|
||||
// Create subdirectory
|
||||
subdir := filepath.Join(tmpdir, "subdir")
|
||||
if err := os.MkdirAll(subdir, 0755); err != nil {
|
||||
t.Fatalf("failed to create subdir: %v", err)
|
||||
}
|
||||
|
||||
// Create certificates with various extensions
|
||||
cert1, _ := generateTestCertWithCN("cert1.example.com")
|
||||
cert2, _ := generateTestCertWithCN("cert2.example.com")
|
||||
|
||||
// Write cert1.pem
|
||||
block1 := &pem.Block{Type: "CERTIFICATE", Bytes: cert1.Raw}
|
||||
if err := os.WriteFile(filepath.Join(tmpdir, "cert1.pem"), pem.EncodeToMemory(block1), 0644); err != nil {
|
||||
t.Fatalf("failed to write cert1: %v", err)
|
||||
}
|
||||
|
||||
// Write cert2.crt in subdir
|
||||
block2 := &pem.Block{Type: "CERTIFICATE", Bytes: cert2.Raw}
|
||||
if err := os.WriteFile(filepath.Join(subdir, "cert2.crt"), pem.EncodeToMemory(block2), 0644); err != nil {
|
||||
t.Fatalf("failed to write cert2: %v", err)
|
||||
}
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: "http://localhost:8443",
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
DiscoveryDirs: []string{tmpdir},
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
// Simulate directory walk manually (as runDiscoveryScan does)
|
||||
var certs []discoveredCertEntry
|
||||
filepath.Walk(tmpdir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
ext := filepath.Ext(path)
|
||||
switch ext {
|
||||
case ".pem", ".crt":
|
||||
found := agent.parsePEMFile(path)
|
||||
certs = append(certs, found...)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if len(certs) != 2 {
|
||||
t.Errorf("expected 2 certificates from directory scan, got %d", len(certs))
|
||||
}
|
||||
}
|
||||
|
||||
// TestCreateTargetConnector_NGINX tests connector creation for NGINX target.
|
||||
func TestCreateTargetConnector_NGINX(t *testing.T) {
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: "http://localhost:8443",
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
configJSON := json.RawMessage(`{"cert_path":"/etc/nginx/cert.pem"}`)
|
||||
connector, err := agent.createTargetConnector("NGINX", configJSON)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if connector == nil {
|
||||
t.Error("expected connector to be non-nil")
|
||||
}
|
||||
}
|
||||
|
||||
// TestCreateTargetConnector_Unsupported tests connector creation for unsupported type.
|
||||
func TestCreateTargetConnector_Unsupported(t *testing.T) {
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: "http://localhost:8443",
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
_, err := agent.createTargetConnector("UnsupportedType", nil)
|
||||
|
||||
if err == nil {
|
||||
t.Error("expected error for unsupported target type")
|
||||
}
|
||||
}
|
||||
|
||||
// TestFetchCertificate_Success tests fetching a certificate from the control plane.
|
||||
func TestFetchCertificate_Success(t *testing.T) {
|
||||
cert, _ := generateTestCertWithCN("test.example.com")
|
||||
block := &pem.Block{Type: "CERTIFICATE", Bytes: cert.Raw}
|
||||
expectedCertPEM := string(pem.EncodeToMemory(block))
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/api/v1/agents/a-test/certificates/mc-001" {
|
||||
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(w).Encode(map[string]string{
|
||||
"certificate_pem": expectedCertPEM,
|
||||
})
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
certPEM, err := agent.fetchCertificate(context.Background(), "mc-001")
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if certPEM != expectedCertPEM {
|
||||
t.Error("certificate PEM mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
// TestFetchCertificate_NotFound tests fetching a non-existent certificate.
|
||||
func TestFetchCertificate_NotFound(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
w.Write([]byte("not found"))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
_, err := agent.fetchCertificate(context.Background(), "mc-nonexistent")
|
||||
if err == nil {
|
||||
t.Error("expected error for non-existent certificate")
|
||||
}
|
||||
}
|
||||
|
||||
// TestReportJobStatus_Success tests reporting job status to the control plane.
|
||||
func TestReportJobStatus_Success(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/api/v1/agents/a-test/jobs/j-001/status" {
|
||||
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||
}
|
||||
if r.Method != http.MethodPost {
|
||||
t.Errorf("unexpected method: %s", r.Method)
|
||||
}
|
||||
|
||||
var payload map[string]string
|
||||
json.NewDecoder(r.Body).Decode(&payload)
|
||||
|
||||
if payload["status"] != "Completed" {
|
||||
t.Errorf("expected status 'Completed', got '%s'", payload["status"])
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
err := agent.reportJobStatus(context.Background(), "j-001", "Completed", "")
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestReportJobStatus_WithError tests reporting job status with error message.
|
||||
func TestReportJobStatus_WithError(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
var payload map[string]string
|
||||
json.NewDecoder(r.Body).Decode(&payload)
|
||||
|
||||
if payload["status"] != "Failed" {
|
||||
t.Errorf("expected status 'Failed', got '%s'", payload["status"])
|
||||
}
|
||||
if payload["error"] != "deployment failed" {
|
||||
t.Errorf("expected error 'deployment failed', got '%s'", payload["error"])
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
err := agent.reportJobStatus(context.Background(), "j-001", "Failed", "deployment failed")
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMakeRequest_Success tests making an authenticated HTTP request.
|
||||
func TestMakeRequest_Success(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
// Verify auth header
|
||||
auth := r.Header.Get("Authorization")
|
||||
if auth != "Bearer test-key" {
|
||||
t.Errorf("unexpected auth: %s", auth)
|
||||
}
|
||||
|
||||
// Verify content-type
|
||||
ct := r.Header.Get("Content-Type")
|
||||
if ct != "application/json" {
|
||||
t.Errorf("unexpected content-type: %s", ct)
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
resp, err := agent.makeRequest(context.Background(), http.MethodPost, "/test", map[string]string{"key": "value"})
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Errorf("unexpected status: %d", resp.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMakeRequest_InvalidURL tests making a request with invalid URL.
|
||||
func TestMakeRequest_InvalidURL(t *testing.T) {
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: "http://invalid-host-that-does-not-exist.local:9999",
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
_, err := agent.makeRequest(context.Background(), http.MethodGet, "/test", nil)
|
||||
if err == nil {
|
||||
t.Error("expected error for unreachable host")
|
||||
}
|
||||
}
|
||||
|
||||
// TestCertKeyInfo tests extraction of key algorithm and size from certificates.
|
||||
func TestCertKeyInfo(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
genKey func() interface{}
|
||||
expectedAlg string
|
||||
minBitSize int
|
||||
}{
|
||||
{
|
||||
name: "ECDSA P-256",
|
||||
genKey: func() interface{} {
|
||||
key, _ := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
|
||||
return key.Public()
|
||||
},
|
||||
expectedAlg: "ECDSA",
|
||||
minBitSize: 256,
|
||||
},
|
||||
{
|
||||
name: "RSA 2048",
|
||||
genKey: func() interface{} {
|
||||
key, _ := rsa.GenerateKey(rand.Reader, 2048)
|
||||
return key.Public()
|
||||
},
|
||||
expectedAlg: "RSA",
|
||||
minBitSize: 2048,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
pubKey := tt.genKey()
|
||||
|
||||
// Create certificate with this key
|
||||
template := &x509.Certificate{
|
||||
SerialNumber: big.NewInt(1),
|
||||
Subject: pkix.Name{
|
||||
CommonName: "test.com",
|
||||
},
|
||||
NotBefore: time.Now(),
|
||||
NotAfter: time.Now().Add(24 * time.Hour),
|
||||
KeyUsage: x509.KeyUsageDigitalSignature,
|
||||
BasicConstraintsValid: true,
|
||||
}
|
||||
|
||||
var privKey interface{}
|
||||
if ecdsaPub, ok := pubKey.(*ecdsa.PublicKey); ok {
|
||||
key, _ := ecdsa.GenerateKey(ecdsaPub.Curve, rand.Reader)
|
||||
privKey = key
|
||||
} else if rsaPub, ok := pubKey.(*rsa.PublicKey); ok {
|
||||
key, _ := rsa.GenerateKey(rand.Reader, rsaPub.N.BitLen())
|
||||
privKey = key
|
||||
}
|
||||
|
||||
certDER, _ := x509.CreateCertificate(rand.Reader, template, template, pubKey, privKey)
|
||||
cert, _ := x509.ParseCertificate(certDER)
|
||||
|
||||
alg, bitSize := certKeyInfo(cert)
|
||||
if alg != tt.expectedAlg {
|
||||
t.Errorf("expected algorithm %s, got %s", tt.expectedAlg, alg)
|
||||
}
|
||||
if bitSize < tt.minBitSize {
|
||||
t.Errorf("expected bitsize >= %d, got %d", tt.minBitSize, bitSize)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestNewAgent tests agent initialization.
|
||||
func TestNewAgent(t *testing.T) {
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: "http://localhost:8443",
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
if agent.config != cfg {
|
||||
t.Error("config not set correctly")
|
||||
}
|
||||
if agent.heartbeatInterval != 60*time.Second {
|
||||
t.Errorf("expected heartbeat interval 60s, got %v", agent.heartbeatInterval)
|
||||
}
|
||||
if agent.pollInterval != 30*time.Second {
|
||||
t.Errorf("expected poll interval 30s, got %v", agent.pollInterval)
|
||||
}
|
||||
if agent.client == nil {
|
||||
t.Error("HTTP client not initialized")
|
||||
}
|
||||
}
|
||||
|
||||
// TestNewAgent_WithLogger tests agent initialization with logger.
|
||||
func TestNewAgent_WithLogger(t *testing.T) {
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: "http://localhost:8443",
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
|
||||
agent := NewAgent(cfg, logger)
|
||||
|
||||
if agent.logger != logger {
|
||||
t.Error("logger not set correctly")
|
||||
}
|
||||
}
|
||||
|
||||
// Helper to create test certificates with specific CN
|
||||
func generateTestCertWithCN(commonName string) (*x509.Certificate, error) {
|
||||
key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
template := &x509.Certificate{
|
||||
SerialNumber: big.NewInt(1),
|
||||
Subject: pkix.Name{
|
||||
CommonName: commonName,
|
||||
},
|
||||
NotBefore: time.Now(),
|
||||
NotAfter: time.Now().Add(24 * time.Hour),
|
||||
KeyUsage: x509.KeyUsageDigitalSignature,
|
||||
BasicConstraintsValid: true,
|
||||
DNSNames: []string{commonName},
|
||||
}
|
||||
|
||||
certDER, err := x509.CreateCertificate(rand.Reader, template, template, &key.PublicKey, key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return x509.ParseCertificate(certDER)
|
||||
}
|
||||
|
||||
// Helper to create string pointer
|
||||
func strPtr(s string) *string {
|
||||
return &s
|
||||
}
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"crypto/ecdsa"
|
||||
"crypto/elliptic"
|
||||
"crypto/rand"
|
||||
"crypto/rsa"
|
||||
"crypto/sha256"
|
||||
"crypto/x509"
|
||||
"crypto/x509/pkix"
|
||||
"encoding/json"
|
||||
@@ -14,32 +16,40 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/connector/target"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/apache"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/caddy"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/f5"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/haproxy"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/iis"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/nginx"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/traefik"
|
||||
)
|
||||
|
||||
// AgentConfig represents the agent-side configuration.
|
||||
type AgentConfig struct {
|
||||
ServerURL string // Control plane server URL (e.g., http://localhost:8443)
|
||||
APIKey string // Agent API key for authentication
|
||||
AgentName string // Agent name for identification
|
||||
AgentID string // Agent ID for API calls (set after registration or from env)
|
||||
Hostname string // Server hostname
|
||||
KeyDir string // Directory for storing private keys (default: /var/lib/certctl/keys)
|
||||
ServerURL string // Control plane server URL (e.g., http://localhost:8443)
|
||||
APIKey string // Agent API key for authentication
|
||||
AgentName string // Agent name for identification
|
||||
AgentID string // Agent ID for API calls (set after registration or from env)
|
||||
Hostname string // Server hostname
|
||||
KeyDir string // Directory for storing private keys (default: /var/lib/certctl/keys)
|
||||
DiscoveryDirs []string // Directories to scan for certificates (comma-separated via env)
|
||||
}
|
||||
|
||||
// Agent represents the local agent that runs on target servers.
|
||||
// It periodically sends heartbeats, polls for work, and executes deployment and CSR jobs.
|
||||
// It periodically sends heartbeats, polls for work, executes deployment and CSR jobs,
|
||||
// and scans configured directories for existing certificates.
|
||||
// In agent keygen mode, private keys are generated and stored locally — they never leave
|
||||
// this process or filesystem.
|
||||
type Agent struct {
|
||||
@@ -50,6 +60,7 @@ type Agent struct {
|
||||
// Configuration
|
||||
heartbeatInterval time.Duration
|
||||
pollInterval time.Duration
|
||||
discoveryInterval time.Duration
|
||||
consecutiveFailures int
|
||||
}
|
||||
|
||||
@@ -80,6 +91,7 @@ func NewAgent(cfg *AgentConfig, logger *slog.Logger) *Agent {
|
||||
client: &http.Client{Timeout: 30 * time.Second},
|
||||
heartbeatInterval: 60 * time.Second,
|
||||
pollInterval: 30 * time.Second,
|
||||
discoveryInterval: 6 * time.Hour, // scan for certs every 6 hours
|
||||
}
|
||||
}
|
||||
|
||||
@@ -102,7 +114,7 @@ func (a *Agent) Run(ctx context.Context) error {
|
||||
a.logger.Warn("failed to enforce key directory permissions", "path", a.config.KeyDir, "error", err)
|
||||
}
|
||||
|
||||
// Create ticker channels for heartbeat and polling
|
||||
// Create ticker channels for heartbeat, polling, and discovery
|
||||
heartbeatTicker := time.NewTicker(a.heartbeatInterval)
|
||||
defer heartbeatTicker.Stop()
|
||||
|
||||
@@ -113,6 +125,22 @@ func (a *Agent) Run(ctx context.Context) error {
|
||||
a.sendHeartbeat(ctx)
|
||||
a.pollForWork(ctx)
|
||||
|
||||
// Discovery: run initial scan if directories configured, then on interval
|
||||
var discoveryTicker *time.Ticker
|
||||
if len(a.config.DiscoveryDirs) > 0 {
|
||||
a.logger.Info("certificate discovery enabled",
|
||||
"directories", a.config.DiscoveryDirs,
|
||||
"interval", a.discoveryInterval.String())
|
||||
a.runDiscoveryScan(ctx)
|
||||
discoveryTicker = time.NewTicker(a.discoveryInterval)
|
||||
defer discoveryTicker.Stop()
|
||||
} else {
|
||||
a.logger.Info("certificate discovery disabled (no CERTCTL_DISCOVERY_DIRS configured)")
|
||||
// Create a stopped ticker so the select compiles
|
||||
discoveryTicker = time.NewTicker(24 * time.Hour)
|
||||
discoveryTicker.Stop()
|
||||
}
|
||||
|
||||
// Main event loop
|
||||
for {
|
||||
select {
|
||||
@@ -135,19 +163,38 @@ func (a *Agent) Run(ctx context.Context) error {
|
||||
time.Sleep(backoff)
|
||||
}
|
||||
a.pollForWork(ctx)
|
||||
|
||||
case <-discoveryTicker.C:
|
||||
if len(a.config.DiscoveryDirs) > 0 {
|
||||
a.runDiscoveryScan(ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sendHeartbeat sends a heartbeat to the control plane.
|
||||
// getOutboundIP returns the preferred outbound IP address of this machine.
|
||||
func getOutboundIP() string {
|
||||
conn, err := net.Dial("udp", "8.8.8.8:80")
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
defer conn.Close()
|
||||
localAddr := conn.LocalAddr().(*net.UDPAddr)
|
||||
return localAddr.IP.String()
|
||||
}
|
||||
|
||||
// sendHeartbeat sends a heartbeat to the control plane with agent metadata.
|
||||
// POST /api/v1/agents/{agentID}/heartbeat
|
||||
func (a *Agent) sendHeartbeat(ctx context.Context) {
|
||||
a.logger.Debug("sending heartbeat", "agent_id", a.config.AgentID)
|
||||
|
||||
path := fmt.Sprintf("/api/v1/agents/%s/heartbeat", a.config.AgentID)
|
||||
resp, err := a.makeRequest(ctx, http.MethodPost, path, map[string]string{
|
||||
"version": "1.0.0",
|
||||
"hostname": a.config.Hostname,
|
||||
"version": "1.0.0",
|
||||
"hostname": a.config.Hostname,
|
||||
"os": runtime.GOOS,
|
||||
"architecture": runtime.GOARCH,
|
||||
"ip_address": getOutboundIP(),
|
||||
})
|
||||
if err != nil {
|
||||
a.logger.Error("heartbeat failed", "error", err)
|
||||
@@ -297,11 +344,23 @@ func (a *Agent) executeCSRJob(ctx context.Context, job JobItem) {
|
||||
}
|
||||
|
||||
// Step 3: Create CSR with common name and SANs
|
||||
// Split SANs into DNS names and email addresses for proper CSR encoding
|
||||
var dnsNames []string
|
||||
var emailAddresses []string
|
||||
for _, san := range job.SANs {
|
||||
if strings.Contains(san, "@") {
|
||||
emailAddresses = append(emailAddresses, san)
|
||||
} else {
|
||||
dnsNames = append(dnsNames, san)
|
||||
}
|
||||
}
|
||||
|
||||
csrTemplate := &x509.CertificateRequest{
|
||||
Subject: pkix.Name{
|
||||
CommonName: job.CommonName,
|
||||
},
|
||||
DNSNames: job.SANs,
|
||||
DNSNames: dnsNames,
|
||||
EmailAddresses: emailAddresses,
|
||||
}
|
||||
|
||||
csrDER, err := x509.CreateCertificateRequest(rand.Reader, csrTemplate, privKey)
|
||||
@@ -463,6 +522,16 @@ func (a *Agent) executeDeploymentJob(ctx context.Context, job JobItem) {
|
||||
"target_type", job.TargetType,
|
||||
"success", result.Success,
|
||||
"message", result.Message)
|
||||
|
||||
// If verification is enabled, verify the deployment by probing the live TLS endpoint
|
||||
targetHost, targetPort, err := extractTargetHostAndPort(job.TargetConfig)
|
||||
if err != nil {
|
||||
a.logger.Warn("could not extract target host/port for verification",
|
||||
"job_id", job.ID,
|
||||
"error", err)
|
||||
} else {
|
||||
a.verifyAndReportDeployment(ctx, job, targetHost, targetPort, certOnly)
|
||||
}
|
||||
} else {
|
||||
a.logger.Info("no target type specified, skipping connector invocation",
|
||||
"job_id", job.ID)
|
||||
@@ -489,6 +558,24 @@ func (a *Agent) createTargetConnector(targetType string, configJSON json.RawMess
|
||||
}
|
||||
return nginx.New(&cfg, a.logger), nil
|
||||
|
||||
case "Apache":
|
||||
var cfg apache.Config
|
||||
if len(configJSON) > 0 {
|
||||
if err := json.Unmarshal(configJSON, &cfg); err != nil {
|
||||
return nil, fmt.Errorf("invalid Apache config: %w", err)
|
||||
}
|
||||
}
|
||||
return apache.New(&cfg, a.logger), nil
|
||||
|
||||
case "HAProxy":
|
||||
var cfg haproxy.Config
|
||||
if len(configJSON) > 0 {
|
||||
if err := json.Unmarshal(configJSON, &cfg); err != nil {
|
||||
return nil, fmt.Errorf("invalid HAProxy config: %w", err)
|
||||
}
|
||||
}
|
||||
return haproxy.New(&cfg, a.logger), nil
|
||||
|
||||
case "F5":
|
||||
var cfg f5.Config
|
||||
if len(configJSON) > 0 {
|
||||
@@ -507,6 +594,24 @@ func (a *Agent) createTargetConnector(targetType string, configJSON json.RawMess
|
||||
}
|
||||
return iis.New(&cfg, a.logger), nil
|
||||
|
||||
case "Traefik":
|
||||
var cfg traefik.Config
|
||||
if len(configJSON) > 0 {
|
||||
if err := json.Unmarshal(configJSON, &cfg); err != nil {
|
||||
return nil, fmt.Errorf("invalid Traefik config: %w", err)
|
||||
}
|
||||
}
|
||||
return traefik.New(&cfg, a.logger), nil
|
||||
|
||||
case "Caddy":
|
||||
var cfg caddy.Config
|
||||
if len(configJSON) > 0 {
|
||||
if err := json.Unmarshal(configJSON, &cfg); err != nil {
|
||||
return nil, fmt.Errorf("invalid Caddy config: %w", err)
|
||||
}
|
||||
}
|
||||
return caddy.New(&cfg, a.logger), nil
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported target type: %s", targetType)
|
||||
}
|
||||
@@ -616,6 +721,239 @@ func (a *Agent) makeRequest(ctx context.Context, method, path string, body inter
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// runDiscoveryScan walks configured directories, parses certificate files, and reports
|
||||
// discovered certificates to the control plane.
|
||||
// Supports PEM and DER encoded X.509 certificates.
|
||||
func (a *Agent) runDiscoveryScan(ctx context.Context) {
|
||||
a.logger.Info("starting filesystem certificate discovery scan",
|
||||
"directories", a.config.DiscoveryDirs)
|
||||
|
||||
startTime := time.Now()
|
||||
var certs []discoveredCertEntry
|
||||
var scanErrors []string
|
||||
|
||||
for _, dir := range a.config.DiscoveryDirs {
|
||||
a.logger.Debug("scanning directory", "path", dir)
|
||||
|
||||
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
scanErrors = append(scanErrors, fmt.Sprintf("walk error at %s: %v", path, err))
|
||||
return nil // continue walking
|
||||
}
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Skip files larger than 1MB (unlikely to be a certificate)
|
||||
if info.Size() > 1*1024*1024 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check file extension
|
||||
ext := strings.ToLower(filepath.Ext(path))
|
||||
switch ext {
|
||||
case ".pem", ".crt", ".cer", ".cert":
|
||||
found := a.parsePEMFile(path)
|
||||
certs = append(certs, found...)
|
||||
case ".der":
|
||||
if entry, err := a.parseDERFile(path); err == nil {
|
||||
certs = append(certs, entry)
|
||||
} else {
|
||||
a.logger.Debug("skipping non-cert DER file", "path", path, "error", err)
|
||||
}
|
||||
default:
|
||||
// Try PEM parsing for extensionless files or unknown extensions
|
||||
if ext == "" || ext == ".key" {
|
||||
return nil // skip key files and extensionless
|
||||
}
|
||||
found := a.parsePEMFile(path)
|
||||
if len(found) > 0 {
|
||||
certs = append(certs, found...)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
scanErrors = append(scanErrors, fmt.Sprintf("failed to walk %s: %v", dir, err))
|
||||
}
|
||||
}
|
||||
|
||||
scanDuration := time.Since(startTime)
|
||||
a.logger.Info("discovery scan completed",
|
||||
"certificates_found", len(certs),
|
||||
"errors", len(scanErrors),
|
||||
"duration_ms", scanDuration.Milliseconds())
|
||||
|
||||
if len(certs) == 0 && len(scanErrors) == 0 {
|
||||
a.logger.Debug("no certificates found and no errors, skipping report")
|
||||
return
|
||||
}
|
||||
|
||||
// Build report payload
|
||||
entries := make([]map[string]interface{}, len(certs))
|
||||
for i, c := range certs {
|
||||
entries[i] = map[string]interface{}{
|
||||
"fingerprint_sha256": c.FingerprintSHA256,
|
||||
"common_name": c.CommonName,
|
||||
"sans": c.SANs,
|
||||
"serial_number": c.SerialNumber,
|
||||
"issuer_dn": c.IssuerDN,
|
||||
"subject_dn": c.SubjectDN,
|
||||
"not_before": c.NotBefore,
|
||||
"not_after": c.NotAfter,
|
||||
"key_algorithm": c.KeyAlgorithm,
|
||||
"key_size": c.KeySize,
|
||||
"is_ca": c.IsCA,
|
||||
"pem_data": c.PEMData,
|
||||
"source_path": c.SourcePath,
|
||||
"source_format": c.SourceFormat,
|
||||
}
|
||||
}
|
||||
|
||||
report := map[string]interface{}{
|
||||
"agent_id": a.config.AgentID,
|
||||
"directories": a.config.DiscoveryDirs,
|
||||
"certificates": entries,
|
||||
"errors": scanErrors,
|
||||
"scan_duration_ms": int(scanDuration.Milliseconds()),
|
||||
}
|
||||
|
||||
// Submit to control plane
|
||||
path := fmt.Sprintf("/api/v1/agents/%s/discoveries", a.config.AgentID)
|
||||
resp, err := a.makeRequest(ctx, http.MethodPost, path, report)
|
||||
if err != nil {
|
||||
a.logger.Error("failed to submit discovery report", "error", err)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusAccepted {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
a.logger.Error("discovery report rejected",
|
||||
"status", resp.StatusCode,
|
||||
"body", string(body))
|
||||
return
|
||||
}
|
||||
|
||||
a.logger.Info("discovery report submitted successfully",
|
||||
"certificates", len(certs),
|
||||
"errors", len(scanErrors))
|
||||
}
|
||||
|
||||
// discoveredCertEntry holds parsed certificate metadata for reporting.
|
||||
type discoveredCertEntry struct {
|
||||
FingerprintSHA256 string `json:"fingerprint_sha256"`
|
||||
CommonName string `json:"common_name"`
|
||||
SANs []string `json:"sans"`
|
||||
SerialNumber string `json:"serial_number"`
|
||||
IssuerDN string `json:"issuer_dn"`
|
||||
SubjectDN string `json:"subject_dn"`
|
||||
NotBefore string `json:"not_before"`
|
||||
NotAfter string `json:"not_after"`
|
||||
KeyAlgorithm string `json:"key_algorithm"`
|
||||
KeySize int `json:"key_size"`
|
||||
IsCA bool `json:"is_ca"`
|
||||
PEMData string `json:"pem_data"`
|
||||
SourcePath string `json:"source_path"`
|
||||
SourceFormat string `json:"source_format"`
|
||||
}
|
||||
|
||||
// parsePEMFile reads a file and extracts all X.509 certificates from PEM blocks.
|
||||
func (a *Agent) parsePEMFile(path string) []discoveredCertEntry {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
a.logger.Debug("failed to read file", "path", path, "error", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
var entries []discoveredCertEntry
|
||||
rest := data
|
||||
for {
|
||||
var block *pem.Block
|
||||
block, rest = pem.Decode(rest)
|
||||
if block == nil {
|
||||
break
|
||||
}
|
||||
if block.Type != "CERTIFICATE" {
|
||||
continue
|
||||
}
|
||||
cert, err := x509.ParseCertificate(block.Bytes)
|
||||
if err != nil {
|
||||
a.logger.Debug("failed to parse certificate in PEM", "path", path, "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
pemStr := string(pem.EncodeToMemory(block))
|
||||
entries = append(entries, certToEntry(cert, path, "PEM", pemStr))
|
||||
}
|
||||
return entries
|
||||
}
|
||||
|
||||
// parseDERFile reads a DER-encoded certificate file.
|
||||
func (a *Agent) parseDERFile(path string) (discoveredCertEntry, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return discoveredCertEntry{}, fmt.Errorf("read failed: %w", err)
|
||||
}
|
||||
|
||||
cert, err := x509.ParseCertificate(data)
|
||||
if err != nil {
|
||||
return discoveredCertEntry{}, fmt.Errorf("parse failed: %w", err)
|
||||
}
|
||||
|
||||
// Convert to PEM for storage
|
||||
pemStr := string(pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: data}))
|
||||
return certToEntry(cert, path, "DER", pemStr), nil
|
||||
}
|
||||
|
||||
// certToEntry converts a parsed x509.Certificate into a discoveredCertEntry.
|
||||
func certToEntry(cert *x509.Certificate, path, format, pemData string) discoveredCertEntry {
|
||||
// Compute SHA-256 fingerprint
|
||||
fingerprint := fmt.Sprintf("%x", sha256Sum(cert.Raw))
|
||||
|
||||
// Determine key algorithm and size
|
||||
keyAlg, keySize := certKeyInfo(cert)
|
||||
|
||||
return discoveredCertEntry{
|
||||
FingerprintSHA256: fingerprint,
|
||||
CommonName: cert.Subject.CommonName,
|
||||
SANs: cert.DNSNames,
|
||||
SerialNumber: cert.SerialNumber.Text(16),
|
||||
IssuerDN: cert.Issuer.String(),
|
||||
SubjectDN: cert.Subject.String(),
|
||||
NotBefore: cert.NotBefore.UTC().Format(time.RFC3339),
|
||||
NotAfter: cert.NotAfter.UTC().Format(time.RFC3339),
|
||||
KeyAlgorithm: keyAlg,
|
||||
KeySize: keySize,
|
||||
IsCA: cert.IsCA,
|
||||
PEMData: pemData,
|
||||
SourcePath: path,
|
||||
SourceFormat: format,
|
||||
}
|
||||
}
|
||||
|
||||
// sha256Sum returns the SHA-256 hash of data.
|
||||
func sha256Sum(data []byte) [32]byte {
|
||||
return sha256.Sum256(data)
|
||||
}
|
||||
|
||||
// certKeyInfo extracts key algorithm name and size from a certificate.
|
||||
func certKeyInfo(cert *x509.Certificate) (string, int) {
|
||||
switch pub := cert.PublicKey.(type) {
|
||||
case *ecdsa.PublicKey:
|
||||
return "ECDSA", pub.Curve.Params().BitSize
|
||||
case *rsa.PublicKey:
|
||||
return "RSA", pub.N.BitLen()
|
||||
default:
|
||||
switch cert.PublicKeyAlgorithm {
|
||||
case x509.Ed25519:
|
||||
return "Ed25519", 256
|
||||
default:
|
||||
return cert.PublicKeyAlgorithm.String(), 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Parse command-line flags (with env var fallbacks for Docker deployment)
|
||||
serverURL := flag.String("server", getEnvDefault("CERTCTL_SERVER_URL", "http://localhost:8443"), "Control plane server URL")
|
||||
@@ -623,6 +961,7 @@ func main() {
|
||||
agentName := flag.String("name", getEnvDefault("CERTCTL_AGENT_NAME", "certctl-agent"), "Agent name")
|
||||
agentID := flag.String("agent-id", getEnvDefault("CERTCTL_AGENT_ID", ""), "Agent ID (from registration)")
|
||||
keyDir := flag.String("key-dir", getEnvDefault("CERTCTL_KEY_DIR", "/var/lib/certctl/keys"), "Directory for storing private keys")
|
||||
discoveryDirsStr := flag.String("discovery-dirs", getEnvDefault("CERTCTL_DISCOVERY_DIRS", ""), "Comma-separated directories to scan for certificates")
|
||||
flag.Parse()
|
||||
|
||||
if *apiKey == "" {
|
||||
@@ -651,14 +990,26 @@ func main() {
|
||||
hostname = "unknown"
|
||||
}
|
||||
|
||||
// Parse discovery directories
|
||||
var discoveryDirs []string
|
||||
if *discoveryDirsStr != "" {
|
||||
for _, d := range strings.Split(*discoveryDirsStr, ",") {
|
||||
d = strings.TrimSpace(d)
|
||||
if d != "" {
|
||||
discoveryDirs = append(discoveryDirs, d)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create agent configuration
|
||||
agentCfg := &AgentConfig{
|
||||
ServerURL: *serverURL,
|
||||
APIKey: *apiKey,
|
||||
AgentName: *agentName,
|
||||
AgentID: *agentID,
|
||||
Hostname: hostname,
|
||||
KeyDir: *keyDir,
|
||||
ServerURL: *serverURL,
|
||||
APIKey: *apiKey,
|
||||
AgentName: *agentName,
|
||||
AgentID: *agentID,
|
||||
Hostname: hostname,
|
||||
KeyDir: *keyDir,
|
||||
DiscoveryDirs: discoveryDirs,
|
||||
}
|
||||
|
||||
// Create and start agent
|
||||
|
||||
@@ -0,0 +1,285 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"encoding/json"
|
||||
"encoding/pem"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
// verifyDeployment probes the live TLS endpoint for a deployment target and verifies
|
||||
// that the deployed certificate matches what we expect.
|
||||
//
|
||||
// Parameters:
|
||||
// - targetHost: the hostname or IP of the target (extracted from target config)
|
||||
// - targetPort: the TLS port of the target (e.g., 443)
|
||||
// - expectedCertPEM: the PEM-encoded certificate that was deployed
|
||||
// - delay: wait time before probing (e.g., 2 seconds for reload to take effect)
|
||||
// - timeout: overall timeout for TLS connection attempt (e.g., 10 seconds)
|
||||
//
|
||||
// Returns:
|
||||
// - A VerificationResult if probing succeeded (even if cert doesn't match)
|
||||
// - An error if the probe itself failed (network error, timeout, etc.)
|
||||
//
|
||||
// The function compares the SHA-256 fingerprints of the expected and actual certificates.
|
||||
// If the certificate served at the endpoint differs, Verified will be false but no error
|
||||
// is returned — this is an expected verification failure, not a probe failure.
|
||||
func verifyDeployment(
|
||||
ctx context.Context,
|
||||
targetHost string,
|
||||
targetPort int,
|
||||
expectedCertPEM string,
|
||||
delay time.Duration,
|
||||
timeout time.Duration,
|
||||
logger *slog.Logger,
|
||||
) (*VerificationResult, error) {
|
||||
// Wait for reload to take effect
|
||||
if delay > 0 {
|
||||
select {
|
||||
case <-time.After(delay):
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
// Parse expected certificate to compute its fingerprint
|
||||
expectedFp, err := computeCertificateFingerprint(expectedCertPEM)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse expected certificate: %w", err)
|
||||
}
|
||||
|
||||
// Connect to the target's TLS endpoint
|
||||
address := fmt.Sprintf("%s:%d", targetHost, targetPort)
|
||||
if logger != nil {
|
||||
logger.Debug("probing TLS endpoint for verification",
|
||||
"address", address,
|
||||
"expected_fingerprint", expectedFp)
|
||||
}
|
||||
|
||||
dialer := &net.Dialer{Timeout: timeout}
|
||||
conn, err := tls.DialWithDialer(dialer, "tcp", address, &tls.Config{
|
||||
// SECURITY NOTE: InsecureSkipVerify is intentionally set to true here.
|
||||
// Post-deployment verification must probe the live endpoint to extract and
|
||||
// compare the served certificate fingerprint, regardless of its validity
|
||||
// state (expired, self-signed, internal CA, etc.). This setting is scoped
|
||||
// to verification probing only — it is NEVER used for control-plane API
|
||||
// calls, issuer connector communication, or any operation that trusts the
|
||||
// certificate. The verification result compares SHA-256 fingerprints only.
|
||||
// See TICKET-016 for full security audit rationale.
|
||||
InsecureSkipVerify: true,
|
||||
ServerName: targetHost, // For SNI
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to connect to %s: %w", address, err)
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
// Extract the leaf certificate from the TLS connection
|
||||
state := conn.ConnectionState()
|
||||
if len(state.PeerCertificates) == 0 {
|
||||
return nil, fmt.Errorf("no certificates presented by %s", address)
|
||||
}
|
||||
|
||||
leafCert := state.PeerCertificates[0]
|
||||
actualFp := fmt.Sprintf("%x", sha256.Sum256(leafCert.Raw))
|
||||
|
||||
if logger != nil {
|
||||
logger.Debug("received certificate from endpoint",
|
||||
"address", address,
|
||||
"cn", leafCert.Subject.CommonName,
|
||||
"actual_fingerprint", actualFp)
|
||||
}
|
||||
|
||||
// Compare fingerprints
|
||||
verified := actualFp == expectedFp
|
||||
if logger != nil {
|
||||
if !verified {
|
||||
logger.Warn("certificate fingerprint mismatch at endpoint",
|
||||
"address", address,
|
||||
"expected_fingerprint", expectedFp,
|
||||
"actual_fingerprint", actualFp)
|
||||
} else {
|
||||
logger.Info("certificate verification succeeded",
|
||||
"address", address,
|
||||
"fingerprint", actualFp)
|
||||
}
|
||||
}
|
||||
|
||||
return &VerificationResult{
|
||||
ExpectedFingerprint: expectedFp,
|
||||
ActualFingerprint: actualFp,
|
||||
Verified: verified,
|
||||
VerifiedAt: time.Now().UTC(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// VerificationResult represents the outcome of verifying a deployed certificate.
|
||||
type VerificationResult struct {
|
||||
ExpectedFingerprint string `json:"expected_fingerprint"`
|
||||
ActualFingerprint string `json:"actual_fingerprint"`
|
||||
Verified bool `json:"verified"`
|
||||
VerifiedAt time.Time `json:"verified_at"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// computeCertificateFingerprint computes the SHA-256 fingerprint of a PEM-encoded certificate.
|
||||
func computeCertificateFingerprint(certPEM string) (string, error) {
|
||||
block, _ := pem.Decode([]byte(certPEM))
|
||||
if block == nil {
|
||||
return "", fmt.Errorf("failed to decode PEM certificate")
|
||||
}
|
||||
|
||||
cert, err := x509.ParseCertificate(block.Bytes)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to parse x509 certificate: %w", err)
|
||||
}
|
||||
|
||||
fp := sha256.Sum256(cert.Raw)
|
||||
return fmt.Sprintf("%x", fp), nil
|
||||
}
|
||||
|
||||
// reportVerificationResult submits the verification result back to the control plane.
|
||||
// This is a best-effort operation — a failure to report doesn't block agent progress.
|
||||
func (a *Agent) reportVerificationResult(
|
||||
ctx context.Context,
|
||||
jobID string,
|
||||
targetID string,
|
||||
result *VerificationResult,
|
||||
) error {
|
||||
if jobID == "" || targetID == "" || result == nil {
|
||||
return fmt.Errorf("missing required fields for verification report")
|
||||
}
|
||||
|
||||
// Build the request payload
|
||||
payload := map[string]interface{}{
|
||||
"target_id": targetID,
|
||||
"expected_fingerprint": result.ExpectedFingerprint,
|
||||
"actual_fingerprint": result.ActualFingerprint,
|
||||
"verified": result.Verified,
|
||||
"error": result.Error,
|
||||
}
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal verification result: %w", err)
|
||||
}
|
||||
|
||||
// POST to /api/v1/jobs/{id}/verify
|
||||
url := fmt.Sprintf("%s/api/v1/jobs/%s/verify", a.config.ServerURL, jobID)
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create verification request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", a.config.APIKey))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := a.client.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to send verification result: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Check response status
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
bodyBytes, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("verification reporting failed with status %d: %s", resp.StatusCode, string(bodyBytes))
|
||||
}
|
||||
|
||||
if a.logger != nil {
|
||||
a.logger.Debug("verification result reported to control plane",
|
||||
"job_id", jobID,
|
||||
"verified", result.Verified)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// extractTargetHostAndPort extracts the host and port from target configuration.
|
||||
// Common target configs include "host" or "hostname" and "port" fields.
|
||||
func extractTargetHostAndPort(configJSON json.RawMessage) (string, int, error) {
|
||||
var config map[string]interface{}
|
||||
if err := json.Unmarshal(configJSON, &config); err != nil {
|
||||
return "", 0, fmt.Errorf("invalid target config JSON: %w", err)
|
||||
}
|
||||
|
||||
// Try common field names for hostname
|
||||
var host string
|
||||
for _, key := range []string{"host", "hostname", "target", "address"} {
|
||||
if h, ok := config[key].(string); ok && h != "" {
|
||||
host = h
|
||||
break
|
||||
}
|
||||
}
|
||||
if host == "" {
|
||||
return "", 0, fmt.Errorf("target config missing host/hostname field")
|
||||
}
|
||||
|
||||
// Try common field names for port, default to 443
|
||||
port := 443
|
||||
if p, ok := config["port"].(float64); ok {
|
||||
port = int(p)
|
||||
}
|
||||
if port < 1 || port > 65535 {
|
||||
return "", 0, fmt.Errorf("invalid port: %d", port)
|
||||
}
|
||||
|
||||
return host, port, nil
|
||||
}
|
||||
|
||||
// verifyAndReportDeployment performs TLS endpoint verification and reports the result.
|
||||
// This is a best-effort operation — failures are logged but don't affect deployment status.
|
||||
func (a *Agent) verifyAndReportDeployment(
|
||||
ctx context.Context,
|
||||
job JobItem,
|
||||
targetHost string,
|
||||
targetPort int,
|
||||
certPEM string,
|
||||
) {
|
||||
// Perform verification with configured timeout and delay
|
||||
result, err := verifyDeployment(ctx, targetHost, targetPort, certPEM,
|
||||
2*time.Second, // delay before probing
|
||||
10*time.Second, // timeout for TLS connection
|
||||
a.logger)
|
||||
|
||||
if err != nil {
|
||||
if a.logger != nil {
|
||||
a.logger.Warn("verification probe failed",
|
||||
"job_id", job.ID,
|
||||
"target_host", targetHost,
|
||||
"target_port", targetPort,
|
||||
"error", err)
|
||||
}
|
||||
// Probe failure: report error but continue
|
||||
result = &VerificationResult{
|
||||
Error: err.Error(),
|
||||
VerifiedAt: time.Now().UTC(),
|
||||
}
|
||||
}
|
||||
|
||||
// Report result to control plane
|
||||
if job.TargetID == nil {
|
||||
if a.logger != nil {
|
||||
a.logger.Warn("cannot report verification: target_id is nil", "job_id", job.ID)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if err := a.reportVerificationResult(ctx, job.ID, *job.TargetID, result); err != nil {
|
||||
if a.logger != nil {
|
||||
a.logger.Warn("failed to report verification result",
|
||||
"job_id", job.ID,
|
||||
"error", err)
|
||||
}
|
||||
// Non-blocking: continue even if report fails
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,431 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/ecdsa"
|
||||
"crypto/elliptic"
|
||||
"crypto/rand"
|
||||
"crypto/x509"
|
||||
"crypto/x509/pkix"
|
||||
"encoding/json"
|
||||
"encoding/pem"
|
||||
"fmt"
|
||||
"math/big"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestComputeCertificateFingerprint(t *testing.T) {
|
||||
// Generate a test certificate for fingerprint validation
|
||||
cert, err := generateTestCert()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to generate test cert: %v", err)
|
||||
}
|
||||
|
||||
certPEM := string(pem.EncodeToMemory(&pem.Block{
|
||||
Type: "CERTIFICATE",
|
||||
Bytes: cert.Raw,
|
||||
}))
|
||||
|
||||
fp, err := computeCertificateFingerprint(certPEM)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if len(fp) != 64 { // SHA256 hex = 64 chars
|
||||
t.Errorf("expected 64 char fingerprint, got %d", len(fp))
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeCertificateFingerprint_InvalidPEM(t *testing.T) {
|
||||
_, err := computeCertificateFingerprint("not a valid pem")
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid PEM")
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeCertificateFingerprint_EmptyString(t *testing.T) {
|
||||
_, err := computeCertificateFingerprint("")
|
||||
if err == nil {
|
||||
t.Error("expected error for empty string")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractTargetHostAndPort_ValidConfig(t *testing.T) {
|
||||
config := map[string]interface{}{
|
||||
"host": "example.com",
|
||||
"port": 443.0,
|
||||
}
|
||||
configJSON, _ := json.Marshal(config)
|
||||
|
||||
host, port, err := extractTargetHostAndPort(configJSON)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if host != "example.com" {
|
||||
t.Errorf("expected host example.com, got %s", host)
|
||||
}
|
||||
if port != 443 {
|
||||
t.Errorf("expected port 443, got %d", port)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractTargetHostAndPort_DefaultPort(t *testing.T) {
|
||||
config := map[string]interface{}{
|
||||
"hostname": "test.local",
|
||||
}
|
||||
configJSON, _ := json.Marshal(config)
|
||||
|
||||
host, port, err := extractTargetHostAndPort(configJSON)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if host != "test.local" {
|
||||
t.Errorf("expected host test.local, got %s", host)
|
||||
}
|
||||
if port != 443 {
|
||||
t.Errorf("expected default port 443, got %d", port)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractTargetHostAndPort_MissingHost(t *testing.T) {
|
||||
config := map[string]interface{}{
|
||||
"port": 443.0,
|
||||
}
|
||||
configJSON, _ := json.Marshal(config)
|
||||
|
||||
_, _, err := extractTargetHostAndPort(configJSON)
|
||||
if err == nil {
|
||||
t.Error("expected error for missing host")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractTargetHostAndPort_InvalidJSON(t *testing.T) {
|
||||
configJSON := []byte("invalid json{")
|
||||
|
||||
_, _, err := extractTargetHostAndPort(configJSON)
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid JSON")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractTargetHostAndPort_AlternativeFieldNames(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
config map[string]interface{}
|
||||
expected string
|
||||
}{
|
||||
{"host", map[string]interface{}{"host": "host1.com"}, "host1.com"},
|
||||
{"hostname", map[string]interface{}{"hostname": "host2.com"}, "host2.com"},
|
||||
{"target", map[string]interface{}{"target": "host3.com"}, "host3.com"},
|
||||
{"address", map[string]interface{}{"address": "host4.com"}, "host4.com"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
configJSON, _ := json.Marshal(tt.config)
|
||||
host, _, err := extractTargetHostAndPort(configJSON)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if host != tt.expected {
|
||||
t.Errorf("expected %s, got %s", tt.expected, host)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyDeployment_Timeout(t *testing.T) {
|
||||
cert, _ := generateTestCert()
|
||||
certPEM := string(pem.EncodeToMemory(&pem.Block{
|
||||
Type: "CERTIFICATE",
|
||||
Bytes: cert.Raw,
|
||||
}))
|
||||
|
||||
ctx := context.Background()
|
||||
result, err := verifyDeployment(ctx, "192.0.2.1", 443, certPEM, 0, 100*time.Millisecond, nil)
|
||||
|
||||
// Connection to reserved test IP should timeout or fail
|
||||
if err == nil && result == nil {
|
||||
t.Error("expected error or result for unreachable host")
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyDeployment_InvalidCertPEM(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
result, err := verifyDeployment(ctx, "localhost", 443, "not a cert", 0, 5*time.Second, nil)
|
||||
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid certificate PEM")
|
||||
}
|
||||
if result != nil {
|
||||
t.Error("expected no result on error")
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to generate a test certificate for testing
|
||||
func generateTestCert() (*x509.Certificate, error) {
|
||||
key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
template := &x509.Certificate{
|
||||
SerialNumber: big.NewInt(1),
|
||||
Subject: pkix.Name{
|
||||
CommonName: "test.example.com",
|
||||
},
|
||||
NotBefore: time.Now(),
|
||||
NotAfter: time.Now().Add(24 * time.Hour),
|
||||
KeyUsage: x509.KeyUsageDigitalSignature,
|
||||
BasicConstraintsValid: true,
|
||||
DNSNames: []string{"test.example.com"},
|
||||
}
|
||||
|
||||
certDER, err := x509.CreateCertificate(rand.Reader, template, template, &key.PublicKey, key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return x509.ParseCertificate(certDER)
|
||||
}
|
||||
|
||||
func TestReportVerificationResult_Success(t *testing.T) {
|
||||
// Create mock HTTP server
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/api/v1/jobs/j-test/verify" {
|
||||
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||
}
|
||||
if r.Method != "POST" {
|
||||
t.Errorf("unexpected method: %s", r.Method)
|
||||
}
|
||||
|
||||
// Check auth header
|
||||
auth := r.Header.Get("Authorization")
|
||||
if auth != "Bearer test-api-key" {
|
||||
t.Errorf("unexpected auth header: %s", auth)
|
||||
}
|
||||
|
||||
// Verify request body
|
||||
var payload map[string]interface{}
|
||||
json.NewDecoder(r.Body).Decode(&payload)
|
||||
if payload["verified"] != true {
|
||||
t.Error("expected verified to be true")
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||
"job_id": "j-test",
|
||||
"verified": true,
|
||||
})
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-api-key",
|
||||
}
|
||||
agent := NewAgent(cfg, nil)
|
||||
|
||||
result := &VerificationResult{
|
||||
ExpectedFingerprint: "abc123",
|
||||
ActualFingerprint: "abc123",
|
||||
Verified: true,
|
||||
VerifiedAt: time.Now().UTC(),
|
||||
}
|
||||
|
||||
err := agent.reportVerificationResult(context.Background(), "j-test", "t-nginx1", result)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReportVerificationResult_MissingFields(t *testing.T) {
|
||||
agent := NewAgent(&AgentConfig{}, nil)
|
||||
|
||||
result := &VerificationResult{
|
||||
Verified: true,
|
||||
VerifiedAt: time.Now().UTC(),
|
||||
}
|
||||
|
||||
err := agent.reportVerificationResult(context.Background(), "", "t-nginx1", result)
|
||||
if err == nil {
|
||||
t.Error("expected error for missing job ID")
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyDeployment_ContextCancellation(t *testing.T) {
|
||||
cert, _ := generateTestCert()
|
||||
certPEM := string(pem.EncodeToMemory(&pem.Block{
|
||||
Type: "CERTIFICATE",
|
||||
Bytes: cert.Raw,
|
||||
}))
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // Cancel immediately
|
||||
|
||||
result, err := verifyDeployment(ctx, "localhost", 443, certPEM, 1*time.Second, 5*time.Second, nil)
|
||||
|
||||
if err == nil {
|
||||
t.Error("expected error for cancelled context")
|
||||
}
|
||||
if result != nil {
|
||||
t.Error("expected no result on context cancellation")
|
||||
}
|
||||
}
|
||||
|
||||
// Mock TLS server for verification testing.
|
||||
// Reserved for future use when real TLS verification integration tests are added.
|
||||
var _ = func(t *testing.T, cert *x509.Certificate) (string, func()) {
|
||||
// Create TLS listener with test certificate
|
||||
listener, err := net.Listen("tcp", "127.0.0.1:0")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create listener: %v", err)
|
||||
}
|
||||
|
||||
address := listener.Addr().String()
|
||||
|
||||
go func() {
|
||||
conn, err := listener.Accept()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer conn.Close()
|
||||
// Simple echo to keep connection alive
|
||||
buf := make([]byte, 1024)
|
||||
conn.Read(buf) //nolint:errcheck
|
||||
}()
|
||||
|
||||
cleanup := func() {
|
||||
listener.Close()
|
||||
}
|
||||
|
||||
return address, cleanup
|
||||
}
|
||||
|
||||
func TestVerificationResult_JSONMarshaling(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
result := &VerificationResult{
|
||||
ExpectedFingerprint: "abc123",
|
||||
ActualFingerprint: "def456",
|
||||
Verified: false,
|
||||
VerifiedAt: now,
|
||||
Error: "fingerprint mismatch",
|
||||
}
|
||||
|
||||
data, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error marshaling: %v", err)
|
||||
}
|
||||
|
||||
var unmarshaled VerificationResult
|
||||
err = json.Unmarshal(data, &unmarshaled)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error unmarshaling: %v", err)
|
||||
}
|
||||
|
||||
if unmarshaled.Error != "fingerprint mismatch" {
|
||||
t.Errorf("error mismatch: got %s", unmarshaled.Error)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReportVerificationResult_ServerError(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
w.Write([]byte("server error"))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
cfg := &AgentConfig{
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-api-key",
|
||||
}
|
||||
agent := NewAgent(cfg, nil)
|
||||
|
||||
result := &VerificationResult{
|
||||
ExpectedFingerprint: "abc123",
|
||||
ActualFingerprint: "abc123",
|
||||
Verified: true,
|
||||
VerifiedAt: time.Now().UTC(),
|
||||
}
|
||||
|
||||
err := agent.reportVerificationResult(context.Background(), "j-test", "t-nginx1", result)
|
||||
if err == nil {
|
||||
t.Error("expected error for server error response")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractTargetHostAndPort_InvalidPort(t *testing.T) {
|
||||
config := map[string]interface{}{
|
||||
"host": "example.com",
|
||||
"port": 99999.0,
|
||||
}
|
||||
configJSON, _ := json.Marshal(config)
|
||||
|
||||
_, _, err := extractTargetHostAndPort(configJSON)
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid port")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractTargetHostAndPort_ZeroPort(t *testing.T) {
|
||||
config := map[string]interface{}{
|
||||
"host": "example.com",
|
||||
"port": 0.0,
|
||||
}
|
||||
configJSON, _ := json.Marshal(config)
|
||||
|
||||
_, _, err := extractTargetHostAndPort(configJSON)
|
||||
if err == nil {
|
||||
t.Error("expected error for zero port")
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyDeployment_FingerprintComparison(t *testing.T) {
|
||||
// Create a simple TLS server for testing
|
||||
server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
// Get the server's TLS certificate from TLS config
|
||||
if len(server.TLS.Certificates) == 0 {
|
||||
t.Skip("no TLS certificates configured on test server")
|
||||
}
|
||||
|
||||
// Parse the leaf certificate from the DER bytes
|
||||
leafDER := server.TLS.Certificates[0].Certificate[0]
|
||||
leafCert, err := x509.ParseCertificate(leafDER)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse test server certificate: %v", err)
|
||||
}
|
||||
|
||||
certPEM := string(pem.EncodeToMemory(&pem.Block{
|
||||
Type: "CERTIFICATE",
|
||||
Bytes: leafCert.Raw,
|
||||
}))
|
||||
|
||||
// Get host and port from the listener address
|
||||
addr := server.Listener.Addr().String()
|
||||
host, portStr, err := net.SplitHostPort(addr)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse server address: %v", err)
|
||||
}
|
||||
port := 0
|
||||
fmt.Sscanf(portStr, "%d", &port)
|
||||
|
||||
// Verify deployment against the live TLS server
|
||||
ctx := context.Background()
|
||||
result, _ := verifyDeployment(ctx, host, port, certPEM, 0, 5*time.Second, nil)
|
||||
|
||||
// This test may fail in some environments due to TLS setup complexity
|
||||
// The key is testing the fingerprint comparison logic
|
||||
if result != nil {
|
||||
if result.Verified && result.ExpectedFingerprint != result.ActualFingerprint {
|
||||
t.Error("fingerprint mismatch: expected and actual should match if Verified is true")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,203 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/cli"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Parse global flags
|
||||
fs := flag.NewFlagSet("certctl-cli", flag.ExitOnError)
|
||||
fs.Usage = func() {
|
||||
fmt.Fprintf(os.Stderr, `certctl-cli — CLI for certificate lifecycle management
|
||||
|
||||
Usage:
|
||||
certctl-cli [global flags] <command> [command flags]
|
||||
|
||||
Global flags:
|
||||
`)
|
||||
fs.PrintDefaults()
|
||||
fmt.Fprintf(os.Stderr, `
|
||||
Commands:
|
||||
certs list List certificates
|
||||
certs get ID Get certificate details
|
||||
certs renew ID Trigger certificate renewal
|
||||
certs revoke ID Revoke a certificate
|
||||
|
||||
agents list List agents
|
||||
agents get ID Get agent details
|
||||
|
||||
jobs list List jobs
|
||||
jobs get ID Get job details
|
||||
jobs cancel ID Cancel a pending job
|
||||
|
||||
import FILE Bulk import certificates from PEM file(s)
|
||||
|
||||
status Show server health + summary stats
|
||||
version Show CLI version
|
||||
|
||||
Examples:
|
||||
certctl-cli --server http://localhost:8443 --api-key mykey certs list
|
||||
certctl-cli certs renew mc-prod --format json
|
||||
certctl-cli import certs.pem
|
||||
`)
|
||||
}
|
||||
|
||||
serverURL := fs.String("server", os.Getenv("CERTCTL_SERVER_URL"), "certctl server URL (env: CERTCTL_SERVER_URL)")
|
||||
if *serverURL == "" {
|
||||
*serverURL = "http://localhost:8443"
|
||||
}
|
||||
|
||||
apiKey := fs.String("api-key", os.Getenv("CERTCTL_API_KEY"), "API key for authentication (env: CERTCTL_API_KEY)")
|
||||
format := fs.String("format", "table", "Output format: table, json")
|
||||
|
||||
fs.Parse(os.Args[1:])
|
||||
|
||||
args := fs.Args()
|
||||
if len(args) == 0 {
|
||||
fs.Usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Create client
|
||||
client := cli.NewClient(*serverURL, *apiKey, *format)
|
||||
|
||||
// Dispatch to appropriate command
|
||||
command := args[0]
|
||||
cmdArgs := args[1:]
|
||||
|
||||
var err error
|
||||
switch command {
|
||||
case "certs":
|
||||
err = handleCerts(client, cmdArgs)
|
||||
case "agents":
|
||||
err = handleAgents(client, cmdArgs)
|
||||
case "jobs":
|
||||
err = handleJobs(client, cmdArgs)
|
||||
case "import":
|
||||
err = handleImport(client, cmdArgs)
|
||||
case "status":
|
||||
err = handleStatus(client)
|
||||
case "version":
|
||||
fmt.Println("certctl-cli version 0.1.0")
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "unknown command: %s\n", command)
|
||||
fs.Usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func handleCerts(client *cli.Client, args []string) error {
|
||||
if len(args) == 0 {
|
||||
fmt.Fprintf(os.Stderr, "usage: certs <list|get|renew|revoke> [options]\n")
|
||||
return nil
|
||||
}
|
||||
|
||||
subcommand := args[0]
|
||||
subArgs := args[1:]
|
||||
|
||||
switch subcommand {
|
||||
case "list":
|
||||
return client.ListCertificates(subArgs)
|
||||
case "get":
|
||||
if len(subArgs) == 0 {
|
||||
fmt.Fprintf(os.Stderr, "usage: certs get <id>\n")
|
||||
return nil
|
||||
}
|
||||
return client.GetCertificate(subArgs[0])
|
||||
case "renew":
|
||||
if len(subArgs) == 0 {
|
||||
fmt.Fprintf(os.Stderr, "usage: certs renew <id>\n")
|
||||
return nil
|
||||
}
|
||||
return client.RenewCertificate(subArgs[0])
|
||||
case "revoke":
|
||||
if len(subArgs) == 0 {
|
||||
fmt.Fprintf(os.Stderr, "usage: certs revoke <id> [--reason <reason>]\n")
|
||||
return nil
|
||||
}
|
||||
id := subArgs[0]
|
||||
reason := "unspecified"
|
||||
if len(subArgs) > 2 && subArgs[1] == "--reason" {
|
||||
reason = subArgs[2]
|
||||
}
|
||||
return client.RevokeCertificate(id, reason)
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "unknown subcommand: certs %s\n", subcommand)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func handleAgents(client *cli.Client, args []string) error {
|
||||
if len(args) == 0 {
|
||||
fmt.Fprintf(os.Stderr, "usage: agents <list|get> [options]\n")
|
||||
return nil
|
||||
}
|
||||
|
||||
subcommand := args[0]
|
||||
subArgs := args[1:]
|
||||
|
||||
switch subcommand {
|
||||
case "list":
|
||||
return client.ListAgents(subArgs)
|
||||
case "get":
|
||||
if len(subArgs) == 0 {
|
||||
fmt.Fprintf(os.Stderr, "usage: agents get <id>\n")
|
||||
return nil
|
||||
}
|
||||
return client.GetAgent(subArgs[0])
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "unknown subcommand: agents %s\n", subcommand)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func handleJobs(client *cli.Client, args []string) error {
|
||||
if len(args) == 0 {
|
||||
fmt.Fprintf(os.Stderr, "usage: jobs <list|get|cancel> [options]\n")
|
||||
return nil
|
||||
}
|
||||
|
||||
subcommand := args[0]
|
||||
subArgs := args[1:]
|
||||
|
||||
switch subcommand {
|
||||
case "list":
|
||||
return client.ListJobs(subArgs)
|
||||
case "get":
|
||||
if len(subArgs) == 0 {
|
||||
fmt.Fprintf(os.Stderr, "usage: jobs get <id>\n")
|
||||
return nil
|
||||
}
|
||||
return client.GetJob(subArgs[0])
|
||||
case "cancel":
|
||||
if len(subArgs) == 0 {
|
||||
fmt.Fprintf(os.Stderr, "usage: jobs cancel <id>\n")
|
||||
return nil
|
||||
}
|
||||
return client.CancelJob(subArgs[0])
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "unknown subcommand: jobs %s\n", subcommand)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func handleImport(client *cli.Client, args []string) error {
|
||||
if len(args) == 0 {
|
||||
fmt.Fprintf(os.Stderr, "usage: import <file> [file2 ...]\n")
|
||||
return nil
|
||||
}
|
||||
return client.ImportCertificates(args)
|
||||
}
|
||||
|
||||
func handleStatus(client *cli.Client) error {
|
||||
return client.GetStatus()
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
|
||||
gomcp "github.com/modelcontextprotocol/go-sdk/mcp"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/mcp"
|
||||
)
|
||||
|
||||
// Version is set at build time via -ldflags.
|
||||
var Version = "dev"
|
||||
|
||||
func main() {
|
||||
serverURL := os.Getenv("CERTCTL_SERVER_URL")
|
||||
if serverURL == "" {
|
||||
serverURL = "http://localhost:8443"
|
||||
}
|
||||
|
||||
apiKey := os.Getenv("CERTCTL_API_KEY")
|
||||
|
||||
client := mcp.NewClient(serverURL, apiKey)
|
||||
|
||||
server := gomcp.NewServer(&gomcp.Implementation{
|
||||
Name: "certctl",
|
||||
Version: Version,
|
||||
}, nil)
|
||||
|
||||
mcp.RegisterTools(server, client)
|
||||
|
||||
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt)
|
||||
defer stop()
|
||||
|
||||
fmt.Fprintf(os.Stderr, "certctl MCP server %s (backend: %s)\n", Version, serverURL)
|
||||
|
||||
if err := server.Run(ctx, &gomcp.StdioTransport{}); err != nil {
|
||||
log.Fatalf("MCP server error: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -16,8 +16,16 @@ import (
|
||||
"github.com/shankar0123/certctl/internal/api/middleware"
|
||||
"github.com/shankar0123/certctl/internal/api/router"
|
||||
"github.com/shankar0123/certctl/internal/config"
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
acmeissuer "github.com/shankar0123/certctl/internal/connector/issuer/acme"
|
||||
"github.com/shankar0123/certctl/internal/connector/issuer/local"
|
||||
opensslissuer "github.com/shankar0123/certctl/internal/connector/issuer/openssl"
|
||||
stepcaissuer "github.com/shankar0123/certctl/internal/connector/issuer/stepca"
|
||||
notifyemail "github.com/shankar0123/certctl/internal/connector/notifier/email"
|
||||
notifyopsgenie "github.com/shankar0123/certctl/internal/connector/notifier/opsgenie"
|
||||
notifypagerduty "github.com/shankar0123/certctl/internal/connector/notifier/pagerduty"
|
||||
notifyslack "github.com/shankar0123/certctl/internal/connector/notifier/slack"
|
||||
notifyteams "github.com/shankar0123/certctl/internal/connector/notifier/teams"
|
||||
"github.com/shankar0123/certctl/internal/repository/postgres"
|
||||
"github.com/shankar0123/certctl/internal/scheduler"
|
||||
"github.com/shankar0123/certctl/internal/service"
|
||||
@@ -37,7 +45,7 @@ func main() {
|
||||
}))
|
||||
|
||||
logger.Info("certctl server starting",
|
||||
"version", "0.1.0",
|
||||
"version", "2.0.9",
|
||||
"server_host", cfg.Server.Host,
|
||||
"server_port", cfg.Server.Port)
|
||||
|
||||
@@ -68,50 +76,193 @@ func main() {
|
||||
policyRepo := postgres.NewPolicyRepository(db)
|
||||
notificationRepo := postgres.NewNotificationRepository(db)
|
||||
renewalPolicyRepo := postgres.NewRenewalPolicyRepository(db)
|
||||
profileRepo := postgres.NewProfileRepository(db)
|
||||
teamRepo := postgres.NewTeamRepository(db)
|
||||
ownerRepo := postgres.NewOwnerRepository(db)
|
||||
logger.Info("initialized all repositories")
|
||||
|
||||
// Initialize Local CA issuer connector
|
||||
// This provides in-memory certificate signing for development, testing, and demo.
|
||||
// The CA is ephemeral (regenerated on restart) and NOT suitable for production.
|
||||
localCA := local.New(nil, logger)
|
||||
// Initialize Local CA issuer connector.
|
||||
// In sub-CA mode (CERTCTL_CA_CERT_PATH + CERTCTL_CA_KEY_PATH set), loads a pre-signed
|
||||
// CA cert+key from disk. All issued certs chain to the upstream root (e.g., ADCS).
|
||||
// Otherwise, generates an ephemeral self-signed CA for development/demo.
|
||||
localCAConfig := &local.Config{}
|
||||
if cfg.CA.CertPath != "" && cfg.CA.KeyPath != "" {
|
||||
localCAConfig.CACertPath = cfg.CA.CertPath
|
||||
localCAConfig.CAKeyPath = cfg.CA.KeyPath
|
||||
logger.Info("Local CA configured in sub-CA mode",
|
||||
"cert_path", cfg.CA.CertPath,
|
||||
"key_path", cfg.CA.KeyPath)
|
||||
} else {
|
||||
logger.Info("Local CA configured in self-signed mode (ephemeral)")
|
||||
}
|
||||
localCA := local.New(localCAConfig, logger)
|
||||
logger.Info("initialized Local CA issuer connector")
|
||||
|
||||
// Initialize ACME issuer connector (for Let's Encrypt, Sectigo, etc.)
|
||||
// The ACME connector is registered but only activated when an issuer record
|
||||
// in the database references it. Configuration comes from the issuer's config JSON.
|
||||
// Initialize ACME issuer connector (for Let's Encrypt, ZeroSSL, Sectigo, Google Trust Services, etc.)
|
||||
// Supports HTTP-01 (default), DNS-01 (for wildcards), and DNS-PERSIST-01 (standing record) challenge types.
|
||||
// EAB (External Account Binding) required by ZeroSSL, Google Trust Services, SSL.com.
|
||||
acmeConnector := acmeissuer.New(&acmeissuer.Config{
|
||||
DirectoryURL: os.Getenv("CERTCTL_ACME_DIRECTORY_URL"),
|
||||
Email: os.Getenv("CERTCTL_ACME_EMAIL"),
|
||||
DirectoryURL: os.Getenv("CERTCTL_ACME_DIRECTORY_URL"),
|
||||
Email: os.Getenv("CERTCTL_ACME_EMAIL"),
|
||||
EABKid: os.Getenv("CERTCTL_ACME_EAB_KID"),
|
||||
EABHmac: os.Getenv("CERTCTL_ACME_EAB_HMAC"),
|
||||
ChallengeType: os.Getenv("CERTCTL_ACME_CHALLENGE_TYPE"),
|
||||
DNSPresentScript: os.Getenv("CERTCTL_ACME_DNS_PRESENT_SCRIPT"),
|
||||
DNSCleanUpScript: os.Getenv("CERTCTL_ACME_DNS_CLEANUP_SCRIPT"),
|
||||
DNSPersistIssuerDomain: os.Getenv("CERTCTL_ACME_DNS_PERSIST_ISSUER_DOMAIN"),
|
||||
}, logger)
|
||||
logger.Info("initialized ACME issuer connector")
|
||||
|
||||
// Initialize step-ca issuer connector (for Smallstep private CA).
|
||||
// Uses the native /sign API with JWK provisioner authentication.
|
||||
stepcaConnector := stepcaissuer.New(&stepcaissuer.Config{
|
||||
CAURL: os.Getenv("CERTCTL_STEPCA_URL"),
|
||||
ProvisionerName: os.Getenv("CERTCTL_STEPCA_PROVISIONER"),
|
||||
ProvisionerKeyPath: os.Getenv("CERTCTL_STEPCA_KEY_PATH"),
|
||||
ProvisionerPassword: os.Getenv("CERTCTL_STEPCA_PASSWORD"),
|
||||
}, logger)
|
||||
logger.Info("initialized step-ca issuer connector")
|
||||
|
||||
// Initialize OpenSSL/Custom CA issuer connector (for script-based CA integrations).
|
||||
// Delegates certificate signing to user-provided scripts.
|
||||
opensslConnector := opensslissuer.New(&opensslissuer.Config{
|
||||
SignScript: os.Getenv("CERTCTL_OPENSSL_SIGN_SCRIPT"),
|
||||
RevokeScript: os.Getenv("CERTCTL_OPENSSL_REVOKE_SCRIPT"),
|
||||
CRLScript: os.Getenv("CERTCTL_OPENSSL_CRL_SCRIPT"),
|
||||
TimeoutSeconds: getEnvIntDefault(os.Getenv("CERTCTL_OPENSSL_TIMEOUT_SECONDS"), 30),
|
||||
}, logger)
|
||||
logger.Info("initialized OpenSSL/Custom CA issuer connector")
|
||||
|
||||
// Build issuer registry: maps issuer IDs (from database) to connector implementations.
|
||||
// "iss-local" matches the seed data issuer ID for the Local CA.
|
||||
// "iss-acme-staging" and "iss-acme-prod" are conventional IDs for ACME issuers.
|
||||
// "iss-stepca" is the step-ca private CA connector.
|
||||
// "iss-openssl" is the custom CA/OpenSSL connector.
|
||||
issuerRegistry := map[string]service.IssuerConnector{
|
||||
"iss-local": service.NewIssuerConnectorAdapter(localCA),
|
||||
"iss-acme-staging": service.NewIssuerConnectorAdapter(acmeConnector),
|
||||
"iss-acme-prod": service.NewIssuerConnectorAdapter(acmeConnector),
|
||||
"iss-stepca": service.NewIssuerConnectorAdapter(stepcaConnector),
|
||||
"iss-openssl": service.NewIssuerConnectorAdapter(opensslConnector),
|
||||
}
|
||||
logger.Info("issuer registry configured", "issuers", len(issuerRegistry))
|
||||
|
||||
// Initialize revocation repository
|
||||
revocationRepo := postgres.NewRevocationRepository(db)
|
||||
|
||||
// Initialize services (following the dependency graph)
|
||||
auditService := service.NewAuditService(auditRepo)
|
||||
policyService := service.NewPolicyService(policyRepo, auditService)
|
||||
certificateService := service.NewCertificateService(certificateRepo, policyService, auditService)
|
||||
notificationService := service.NewNotificationService(notificationRepo, make(map[string]service.Notifier))
|
||||
renewalService := service.NewRenewalService(certificateRepo, jobRepo, renewalPolicyRepo, auditService, notificationService, issuerRegistry, cfg.Keygen.Mode)
|
||||
notifierRegistry := make(map[string]service.Notifier)
|
||||
|
||||
// Wire notifier connectors from config
|
||||
if cfg.Notifiers.SlackWebhookURL != "" {
|
||||
slackNotifier := notifyslack.New(notifyslack.Config{
|
||||
WebhookURL: cfg.Notifiers.SlackWebhookURL,
|
||||
ChannelOverride: cfg.Notifiers.SlackChannel,
|
||||
Username: cfg.Notifiers.SlackUsername,
|
||||
})
|
||||
notifierRegistry["Slack"] = slackNotifier
|
||||
logger.Info("Slack notifier enabled")
|
||||
}
|
||||
if cfg.Notifiers.TeamsWebhookURL != "" {
|
||||
teamsNotifier := notifyteams.New(notifyteams.Config{
|
||||
WebhookURL: cfg.Notifiers.TeamsWebhookURL,
|
||||
})
|
||||
notifierRegistry["Teams"] = teamsNotifier
|
||||
logger.Info("Teams notifier enabled")
|
||||
}
|
||||
if cfg.Notifiers.PagerDutyRoutingKey != "" {
|
||||
pdNotifier := notifypagerduty.New(notifypagerduty.Config{
|
||||
RoutingKey: cfg.Notifiers.PagerDutyRoutingKey,
|
||||
Severity: cfg.Notifiers.PagerDutySeverity,
|
||||
})
|
||||
notifierRegistry["PagerDuty"] = pdNotifier
|
||||
logger.Info("PagerDuty notifier enabled")
|
||||
}
|
||||
if cfg.Notifiers.OpsGenieAPIKey != "" {
|
||||
ogNotifier := notifyopsgenie.New(notifyopsgenie.Config{
|
||||
APIKey: cfg.Notifiers.OpsGenieAPIKey,
|
||||
Priority: cfg.Notifiers.OpsGeniePriority,
|
||||
})
|
||||
notifierRegistry["OpsGenie"] = ogNotifier
|
||||
logger.Info("OpsGenie notifier enabled")
|
||||
}
|
||||
|
||||
// Wire email notifier if SMTP is configured
|
||||
var emailAdapter *notifyemail.NotifierAdapter
|
||||
if cfg.Notifiers.SMTPHost != "" && cfg.Notifiers.SMTPFromAddress != "" {
|
||||
emailConnector := notifyemail.New(¬ifyemail.Config{
|
||||
SMTPHost: cfg.Notifiers.SMTPHost,
|
||||
SMTPPort: cfg.Notifiers.SMTPPort,
|
||||
Username: cfg.Notifiers.SMTPUsername,
|
||||
Password: cfg.Notifiers.SMTPPassword,
|
||||
FromAddress: cfg.Notifiers.SMTPFromAddress,
|
||||
UseTLS: cfg.Notifiers.SMTPUseTLS,
|
||||
}, logger)
|
||||
emailAdapter = notifyemail.NewNotifierAdapter(emailConnector)
|
||||
notifierRegistry["Email"] = emailAdapter
|
||||
logger.Info("Email notifier enabled",
|
||||
"smtp_host", cfg.Notifiers.SMTPHost,
|
||||
"smtp_port", cfg.Notifiers.SMTPPort,
|
||||
"from", cfg.Notifiers.SMTPFromAddress)
|
||||
}
|
||||
|
||||
notificationService := service.NewNotificationService(notificationRepo, notifierRegistry)
|
||||
notificationService.SetOwnerRepo(ownerRepo)
|
||||
|
||||
// Create RevocationSvc with its dependencies
|
||||
revocationSvc := service.NewRevocationSvc(certificateRepo, revocationRepo, auditService)
|
||||
revocationSvc.SetIssuerRegistry(issuerRegistry)
|
||||
revocationSvc.SetNotificationService(notificationService)
|
||||
|
||||
// Create CAOperationsSvc with its dependencies
|
||||
caOperationsSvc := service.NewCAOperationsSvc(revocationRepo, certificateRepo, profileRepo)
|
||||
caOperationsSvc.SetIssuerRegistry(issuerRegistry)
|
||||
|
||||
// Wire sub-services into CertificateService
|
||||
certificateService.SetRevocationSvc(revocationSvc)
|
||||
certificateService.SetCAOperationsSvc(caOperationsSvc)
|
||||
certificateService.SetTargetRepo(targetRepo)
|
||||
renewalService := service.NewRenewalService(certificateRepo, jobRepo, renewalPolicyRepo, profileRepo, auditService, notificationService, issuerRegistry, cfg.Keygen.Mode)
|
||||
deploymentService := service.NewDeploymentService(jobRepo, targetRepo, agentRepo, certificateRepo, auditService, notificationService)
|
||||
jobService := service.NewJobService(jobRepo, renewalService, deploymentService, logger)
|
||||
agentService := service.NewAgentService(agentRepo, certificateRepo, jobRepo, targetRepo, auditService, issuerRegistry, renewalService)
|
||||
agentService.SetProfileRepo(profileRepo)
|
||||
issuerService := service.NewIssuerService(issuerRepo, auditService)
|
||||
targetService := service.NewTargetService(targetRepo, auditService)
|
||||
profileService := service.NewProfileService(profileRepo, auditService)
|
||||
teamService := service.NewTeamService(teamRepo, auditService)
|
||||
ownerService := service.NewOwnerService(ownerRepo, auditService)
|
||||
agentGroupRepo := postgres.NewAgentGroupRepository(db)
|
||||
agentGroupService := service.NewAgentGroupService(agentGroupRepo, auditService)
|
||||
discoveryRepo := postgres.NewDiscoveryRepository(db)
|
||||
discoveryService := service.NewDiscoveryService(discoveryRepo, certificateRepo, auditService)
|
||||
networkScanRepo := postgres.NewNetworkScanRepository(db)
|
||||
networkScanService := service.NewNetworkScanService(networkScanRepo, discoveryService, auditService, logger)
|
||||
logger.Info("initialized network scan service")
|
||||
|
||||
// Ensure the sentinel "server-scanner" agent exists for network discovery dedup.
|
||||
// This agent ID is used as the agent_id in discovered_certificates for network-scanned certs.
|
||||
if cfg.NetworkScan.Enabled {
|
||||
sentinelAgent := &domain.Agent{
|
||||
ID: service.SentinelAgentID,
|
||||
Name: "Network Scanner (Server-Side)",
|
||||
Status: domain.AgentStatusOnline,
|
||||
}
|
||||
if err := agentRepo.Create(context.Background(), sentinelAgent); err != nil {
|
||||
// Ignore duplicate key errors (agent already exists)
|
||||
logger.Debug("sentinel agent creation", "status", "exists or created", "id", service.SentinelAgentID)
|
||||
}
|
||||
}
|
||||
|
||||
logger.Info("initialized all services")
|
||||
|
||||
// Initialize stats and metrics services
|
||||
statsService := service.NewStatsService(certificateRepo, jobRepo, agentRepo)
|
||||
logger.Info("initialized stats service")
|
||||
|
||||
// Initialize API handlers
|
||||
certificateHandler := handler.NewCertificateHandler(certificateService)
|
||||
issuerHandler := handler.NewIssuerHandler(issuerService)
|
||||
@@ -119,11 +270,41 @@ func main() {
|
||||
agentHandler := handler.NewAgentHandler(agentService)
|
||||
jobHandler := handler.NewJobHandler(jobService)
|
||||
policyHandler := handler.NewPolicyHandler(policyService)
|
||||
profileHandler := handler.NewProfileHandler(profileService)
|
||||
teamHandler := handler.NewTeamHandler(teamService)
|
||||
ownerHandler := handler.NewOwnerHandler(ownerService)
|
||||
agentGroupHandler := handler.NewAgentGroupHandler(agentGroupService)
|
||||
auditHandler := handler.NewAuditHandler(auditService)
|
||||
notificationHandler := handler.NewNotificationHandler(notificationService)
|
||||
statsHandler := handler.NewStatsHandler(statsService)
|
||||
metricsHandler := handler.NewMetricsHandler(statsService, time.Now())
|
||||
healthHandler := handler.NewHealthHandler(cfg.Auth.Type)
|
||||
discoveryHandler := handler.NewDiscoveryHandler(discoveryService)
|
||||
networkScanHandler := handler.NewNetworkScanHandler(networkScanService)
|
||||
verificationService := service.NewVerificationService(jobRepo, auditService, logger)
|
||||
verificationHandler := handler.NewVerificationHandler(verificationService)
|
||||
exportService := service.NewExportService(certificateRepo, auditService)
|
||||
exportHandler := handler.NewExportHandler(exportService)
|
||||
|
||||
// Initialize digest service (requires email notifier)
|
||||
var digestService *service.DigestService
|
||||
var digestHandler *handler.DigestHandler
|
||||
if cfg.Digest.Enabled && emailAdapter != nil {
|
||||
digestService = service.NewDigestService(
|
||||
statsService, certificateRepo, ownerRepo, emailAdapter, cfg.Digest.Recipients, logger,
|
||||
)
|
||||
digestHandler = handler.NewDigestHandler(digestService)
|
||||
logger.Info("digest service enabled",
|
||||
"interval", cfg.Digest.Interval.String(),
|
||||
"recipients", len(cfg.Digest.Recipients))
|
||||
} else {
|
||||
// Create a no-op digest handler for route registration
|
||||
digestHandler = handler.NewDigestHandler(nil)
|
||||
if cfg.Digest.Enabled && emailAdapter == nil {
|
||||
logger.Warn("digest enabled but SMTP not configured — digest emails will not be sent")
|
||||
}
|
||||
}
|
||||
|
||||
logger.Info("initialized all handlers")
|
||||
|
||||
// Create context with cancellation
|
||||
@@ -136,6 +317,7 @@ func main() {
|
||||
jobService,
|
||||
agentService,
|
||||
notificationService,
|
||||
networkScanService,
|
||||
logger,
|
||||
)
|
||||
|
||||
@@ -144,6 +326,15 @@ func main() {
|
||||
sched.SetJobProcessorInterval(cfg.Scheduler.JobProcessorInterval)
|
||||
sched.SetAgentHealthCheckInterval(cfg.Scheduler.AgentHealthCheckInterval)
|
||||
sched.SetNotificationProcessInterval(cfg.Scheduler.NotificationProcessInterval)
|
||||
if cfg.NetworkScan.Enabled {
|
||||
sched.SetNetworkScanInterval(cfg.NetworkScan.ScanInterval)
|
||||
logger.Info("network scanning enabled", "interval", cfg.NetworkScan.ScanInterval.String())
|
||||
}
|
||||
if digestService != nil {
|
||||
sched.SetDigestService(digestService)
|
||||
sched.SetDigestInterval(cfg.Digest.Interval)
|
||||
logger.Info("digest scheduler enabled", "interval", cfg.Digest.Interval.String())
|
||||
}
|
||||
|
||||
// Start scheduler
|
||||
logger.Info("starting scheduler")
|
||||
@@ -153,19 +344,47 @@ func main() {
|
||||
|
||||
// Build the API router with all handlers
|
||||
apiRouter := router.New()
|
||||
apiRouter.RegisterHandlers(
|
||||
certificateHandler,
|
||||
issuerHandler,
|
||||
targetHandler,
|
||||
agentHandler,
|
||||
jobHandler,
|
||||
policyHandler,
|
||||
teamHandler,
|
||||
ownerHandler,
|
||||
auditHandler,
|
||||
notificationHandler,
|
||||
healthHandler,
|
||||
)
|
||||
apiRouter.RegisterHandlers(router.HandlerRegistry{
|
||||
Certificates: certificateHandler,
|
||||
Issuers: issuerHandler,
|
||||
Targets: targetHandler,
|
||||
Agents: agentHandler,
|
||||
Jobs: jobHandler,
|
||||
Policies: policyHandler,
|
||||
Profiles: profileHandler,
|
||||
Teams: teamHandler,
|
||||
Owners: ownerHandler,
|
||||
AgentGroups: agentGroupHandler,
|
||||
Audit: auditHandler,
|
||||
Notifications: notificationHandler,
|
||||
Stats: statsHandler,
|
||||
Metrics: metricsHandler,
|
||||
Health: healthHandler,
|
||||
Discovery: discoveryHandler,
|
||||
NetworkScan: networkScanHandler,
|
||||
Verification: verificationHandler,
|
||||
Export: exportHandler,
|
||||
Digest: *digestHandler,
|
||||
})
|
||||
// Register EST (RFC 7030) handlers if enabled
|
||||
if cfg.EST.Enabled {
|
||||
issuerConn, ok := issuerRegistry[cfg.EST.IssuerID]
|
||||
if !ok {
|
||||
logger.Error("EST issuer not found in registry", "issuer_id", cfg.EST.IssuerID)
|
||||
os.Exit(1)
|
||||
}
|
||||
estService := service.NewESTService(cfg.EST.IssuerID, issuerConn, auditService, logger)
|
||||
if cfg.EST.ProfileID != "" {
|
||||
estService.SetProfileID(cfg.EST.ProfileID)
|
||||
}
|
||||
estHandler := handler.NewESTHandler(estService)
|
||||
apiRouter.RegisterESTHandlers(estHandler)
|
||||
logger.Info("EST server enabled",
|
||||
"issuer_id", cfg.EST.IssuerID,
|
||||
"profile_id", cfg.EST.ProfileID,
|
||||
"endpoints", "/.well-known/est/{cacerts,simpleenroll,simplereenroll,csrattrs}")
|
||||
}
|
||||
|
||||
logger.Info("registered all API handlers")
|
||||
|
||||
// Build middleware stack
|
||||
@@ -177,12 +396,34 @@ func main() {
|
||||
AllowedOrigins: cfg.CORS.AllowedOrigins,
|
||||
})
|
||||
|
||||
structuredLogger := middleware.NewLogging(logger)
|
||||
|
||||
// Request body size limit middleware — prevents memory exhaustion attacks (CWE-400)
|
||||
bodyLimitMiddleware := middleware.NewBodyLimit(middleware.BodyLimitConfig{
|
||||
MaxBytes: cfg.Server.MaxBodySize,
|
||||
})
|
||||
logger.Info("request body size limit enabled", "max_bytes", cfg.Server.MaxBodySize)
|
||||
|
||||
// API audit log middleware — records every API call to the audit trail
|
||||
auditAdapter := middleware.NewAuditServiceAdapter(
|
||||
func(ctx context.Context, actor string, actorType string, action string, resourceType string, resourceID string, details map[string]interface{}) error {
|
||||
return auditService.RecordEvent(ctx, actor, domain.ActorType(actorType), action, resourceType, resourceID, details)
|
||||
},
|
||||
)
|
||||
auditMiddleware := middleware.NewAuditLog(auditAdapter, middleware.AuditConfig{
|
||||
ExcludePaths: []string{"/health", "/ready"},
|
||||
Logger: logger,
|
||||
})
|
||||
logger.Info("API audit logging enabled (excluding /health, /ready)")
|
||||
|
||||
middlewareStack := []func(http.Handler) http.Handler{
|
||||
middleware.RequestID,
|
||||
middleware.Logging,
|
||||
structuredLogger,
|
||||
middleware.Recovery,
|
||||
bodyLimitMiddleware,
|
||||
corsMiddleware,
|
||||
authMiddleware,
|
||||
auditMiddleware,
|
||||
}
|
||||
|
||||
// Add rate limiter if enabled
|
||||
@@ -193,11 +434,13 @@ func main() {
|
||||
})
|
||||
middlewareStack = []func(http.Handler) http.Handler{
|
||||
middleware.RequestID,
|
||||
middleware.Logging,
|
||||
structuredLogger,
|
||||
middleware.Recovery,
|
||||
bodyLimitMiddleware,
|
||||
rateLimiter,
|
||||
corsMiddleware,
|
||||
authMiddleware,
|
||||
auditMiddleware,
|
||||
}
|
||||
logger.Info("rate limiting enabled", "rps", cfg.RateLimit.RPS, "burst", cfg.RateLimit.BurstSize)
|
||||
}
|
||||
@@ -228,9 +471,10 @@ func main() {
|
||||
fileServer := http.FileServer(http.Dir(webDir))
|
||||
finalHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
path := r.URL.Path
|
||||
// API and health routes go to the API handler
|
||||
// API, health, and EST routes go to the API handler
|
||||
if path == "/health" || path == "/ready" ||
|
||||
(len(path) >= 8 && path[:8] == "/api/v1/") {
|
||||
(len(path) >= 8 && path[:8] == "/api/v1/") ||
|
||||
(len(path) >= 16 && path[:16] == "/.well-known/est") {
|
||||
apiHandler.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
@@ -251,11 +495,12 @@ func main() {
|
||||
// Server configuration
|
||||
addr := net.JoinHostPort(cfg.Server.Host, strconv.Itoa(cfg.Server.Port))
|
||||
httpServer := &http.Server{
|
||||
Addr: addr,
|
||||
Handler: finalHandler,
|
||||
ReadTimeout: 15 * time.Second,
|
||||
WriteTimeout: 15 * time.Second,
|
||||
IdleTimeout: 60 * time.Second,
|
||||
Addr: addr,
|
||||
Handler: finalHandler,
|
||||
ReadTimeout: 15 * time.Second,
|
||||
ReadHeaderTimeout: 5 * time.Second,
|
||||
WriteTimeout: 15 * time.Second,
|
||||
IdleTimeout: 60 * time.Second,
|
||||
}
|
||||
|
||||
// Start HTTP server in background
|
||||
@@ -279,6 +524,12 @@ func main() {
|
||||
|
||||
cancel() // Stop scheduler
|
||||
|
||||
// Wait for in-flight scheduler work to complete (up to 30 seconds)
|
||||
logger.Info("waiting for scheduler to complete in-flight work")
|
||||
if err := sched.WaitForCompletion(30 * time.Second); err != nil {
|
||||
logger.Warn("scheduler work did not complete in time", "error", err)
|
||||
}
|
||||
|
||||
logger.Info("shutting down HTTP server")
|
||||
if err := httpServer.Shutdown(shutdownCtx); err != nil {
|
||||
logger.Error("HTTP server shutdown error", "error", err)
|
||||
@@ -291,3 +542,15 @@ func main() {
|
||||
|
||||
logger.Info("certctl server stopped")
|
||||
}
|
||||
|
||||
// getEnvIntDefault parses an integer from a string with a default fallback.
|
||||
func getEnvIntDefault(s string, defaultVal int) int {
|
||||
if s == "" {
|
||||
return defaultVal
|
||||
}
|
||||
val, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
return defaultVal
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
@@ -12,8 +12,15 @@ services:
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
- ../migrations/000001_initial_schema.up.sql:/docker-entrypoint-initdb.d/001_schema.sql
|
||||
- ../migrations/seed.sql:/docker-entrypoint-initdb.d/002_seed.sql
|
||||
- ../migrations/seed_demo.sql:/docker-entrypoint-initdb.d/003_seed_demo.sql
|
||||
- ../migrations/000002_agent_metadata.up.sql:/docker-entrypoint-initdb.d/002_agent_metadata.sql
|
||||
- ../migrations/000003_certificate_profiles.up.sql:/docker-entrypoint-initdb.d/003_certificate_profiles.sql
|
||||
- ../migrations/000004_agent_groups.up.sql:/docker-entrypoint-initdb.d/004_agent_groups.sql
|
||||
- ../migrations/000005_revocation.up.sql:/docker-entrypoint-initdb.d/005_revocation.sql
|
||||
- ../migrations/000006_discovery.up.sql:/docker-entrypoint-initdb.d/006_discovery.sql
|
||||
- ../migrations/000007_network_discovery.up.sql:/docker-entrypoint-initdb.d/007_network_discovery.sql
|
||||
- ../migrations/000008_verification.up.sql:/docker-entrypoint-initdb.d/008_verification.sql
|
||||
- ../migrations/seed.sql:/docker-entrypoint-initdb.d/010_seed.sql
|
||||
- ../migrations/seed_demo.sql:/docker-entrypoint-initdb.d/011_seed_demo.sql
|
||||
networks:
|
||||
- certctl-network
|
||||
healthcheck:
|
||||
@@ -39,6 +46,7 @@ services:
|
||||
CERTCTL_LOG_LEVEL: info
|
||||
CERTCTL_AUTH_TYPE: none
|
||||
CERTCTL_KEYGEN_MODE: server # Demo uses server-side keygen; production should use "agent"
|
||||
CERTCTL_NETWORK_SCAN_ENABLED: "true" # Enable network scan GUI with seeded demo targets
|
||||
ports:
|
||||
- "8443:8443"
|
||||
networks:
|
||||
|
||||
@@ -0,0 +1,461 @@
|
||||
# Certctl Helm Chart - Complete Summary
|
||||
|
||||
## Overview
|
||||
|
||||
A production-ready Helm chart for deploying certctl (self-hosted certificate lifecycle management platform) on Kubernetes. The chart provides:
|
||||
|
||||
- High availability support with multi-replica deployments
|
||||
- Persistent PostgreSQL database with automatic schema migration
|
||||
- DaemonSet or Deployment-based agent deployment
|
||||
- Comprehensive security contexts and RBAC
|
||||
- Multiple deployment scenarios (dev, prod, HA, external DB)
|
||||
- Full documentation and examples
|
||||
|
||||
## Chart Metadata
|
||||
|
||||
- **Name**: certctl
|
||||
- **Chart Version**: 0.1.0
|
||||
- **App Version**: 2.1.0
|
||||
- **Type**: application
|
||||
- **License**: BSL-1.1 (converts to Apache 2.0 in 2033)
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
deploy/helm/
|
||||
├── README.md # Main Helm chart documentation
|
||||
├── DEPLOYMENT_GUIDE.md # Step-by-step deployment guide
|
||||
├── CHART_SUMMARY.md # This file
|
||||
│
|
||||
├── certctl/
|
||||
│ ├── Chart.yaml # Chart metadata
|
||||
│ ├── values.yaml # Default configuration values
|
||||
│ ├── .helmignore # Files to ignore when building chart
|
||||
│ │
|
||||
│ └── templates/
|
||||
│ ├── _helpers.tpl # Helm template helper functions
|
||||
│ ├── NOTES.txt # Post-deployment notes
|
||||
│ │
|
||||
│ ├── server-deployment.yaml # Certctl API server deployment
|
||||
│ ├── server-service.yaml # Server Kubernetes service
|
||||
│ ├── server-configmap.yaml # Server configuration
|
||||
│ ├── server-secret.yaml # Server secrets (API key, DB password, etc)
|
||||
│ │
|
||||
│ ├── postgres-statefulset.yaml # PostgreSQL database statefulset
|
||||
│ ├── postgres-service.yaml # PostgreSQL headless service
|
||||
│ ├── postgres-secret.yaml # Database credentials secret
|
||||
│ │
|
||||
│ ├── agent-daemonset.yaml # Certctl agent daemonset/deployment
|
||||
│ ├── agent-configmap.yaml # Agent configuration
|
||||
│ │
|
||||
│ ├── ingress.yaml # Optional ingress resource
|
||||
│ └── serviceaccount.yaml # ServiceAccount and RBAC
|
||||
│
|
||||
└── examples/
|
||||
├── values-dev.yaml # Development/testing configuration
|
||||
├── values-prod-ha.yaml # Production HA configuration
|
||||
├── values-external-db.yaml # External PostgreSQL (RDS, Cloud SQL)
|
||||
└── values-acme-dns01.yaml # ACME with DNS-01 (Let's Encrypt)
|
||||
```
|
||||
|
||||
## Key Components
|
||||
|
||||
### 1. Server Deployment
|
||||
|
||||
**File**: `templates/server-deployment.yaml`
|
||||
|
||||
- Manages certctl API server instances
|
||||
- Configurable replicas (default: 1)
|
||||
- Health checks (liveness & readiness probes)
|
||||
- Security context: non-root user, read-only filesystem
|
||||
- Resource limits (default: 500m CPU, 512Mi memory)
|
||||
- Automatic restart on failure
|
||||
|
||||
**Values**:
|
||||
```yaml
|
||||
server:
|
||||
replicas: 1
|
||||
port: 8443
|
||||
auth:
|
||||
type: api-key
|
||||
apiKey: "REQUIRED"
|
||||
resources:
|
||||
requests: {cpu: 100m, memory: 128Mi}
|
||||
limits: {cpu: 500m, memory: 512Mi}
|
||||
```
|
||||
|
||||
### 2. PostgreSQL StatefulSet
|
||||
|
||||
**File**: `templates/postgres-statefulset.yaml`
|
||||
|
||||
- Persistent database storage
|
||||
- Automatic schema migrations on startup
|
||||
- Single replica (can be extended with external HA tools)
|
||||
- Health checks via pg_isready
|
||||
- Configurable storage size and class
|
||||
- Security context: non-root user (UID 999)
|
||||
|
||||
**Values**:
|
||||
```yaml
|
||||
postgresql:
|
||||
enabled: true
|
||||
storage:
|
||||
size: 10Gi
|
||||
storageClass: "" # Use default
|
||||
auth:
|
||||
database: certctl
|
||||
username: certctl
|
||||
password: "REQUIRED"
|
||||
```
|
||||
|
||||
### 3. Agent DaemonSet/Deployment
|
||||
|
||||
**File**: `templates/agent-daemonset.yaml`
|
||||
|
||||
- DaemonSet mode: one agent per Kubernetes node
|
||||
- Deployment mode: custom number of agent replicas
|
||||
- Local key storage with secure permissions (0600)
|
||||
- Health checks and automatic restart
|
||||
- Optional certificate discovery from filesystem
|
||||
|
||||
**Values**:
|
||||
```yaml
|
||||
agent:
|
||||
enabled: true
|
||||
kind: DaemonSet # or Deployment
|
||||
replicas: 1 # for Deployment only
|
||||
keyDir: /var/lib/certctl/keys
|
||||
discoveryDirs: "/etc/ssl/certs" # optional
|
||||
```
|
||||
|
||||
### 4. Ingress (Optional)
|
||||
|
||||
**File**: `templates/ingress.yaml`
|
||||
|
||||
- Optional HTTPS ingress
|
||||
- cert-manager integration for automatic TLS
|
||||
- Multiple host support
|
||||
- Path-based routing
|
||||
|
||||
**Values**:
|
||||
```yaml
|
||||
ingress:
|
||||
enabled: false
|
||||
className: nginx
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
hosts:
|
||||
- host: certctl.example.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
```
|
||||
|
||||
### 5. ConfigMaps and Secrets
|
||||
|
||||
**Files**:
|
||||
- `server-configmap.yaml` - Non-secret server configuration
|
||||
- `server-secret.yaml` - API key, database URL, SMTP password
|
||||
- `postgres-secret.yaml` - Database credentials
|
||||
- `agent-configmap.yaml` - Agent configuration
|
||||
|
||||
All secrets are base64-encoded and stored in Kubernetes Secrets.
|
||||
|
||||
### 6. ServiceAccount and RBAC
|
||||
|
||||
**File**: `templates/serviceaccount.yaml`
|
||||
|
||||
- Optional ServiceAccount creation
|
||||
- Optional RBAC (ClusterRole, ClusterRoleBinding)
|
||||
- Namespace-scoped by default
|
||||
|
||||
## Deployment Scenarios
|
||||
|
||||
### Development Setup
|
||||
|
||||
Use `examples/values-dev.yaml`:
|
||||
|
||||
```bash
|
||||
helm install certctl certctl/ \
|
||||
--values examples/values-dev.yaml \
|
||||
--set server.auth.apiKey="dev-key" \
|
||||
--set postgresql.auth.password="dev-password"
|
||||
```
|
||||
|
||||
**Features**:
|
||||
- Single server replica
|
||||
- Demo auth (no API key required)
|
||||
- Small database (5Gi)
|
||||
- LoadBalancer service for easy access
|
||||
- Debug logging level
|
||||
|
||||
### Production HA Setup
|
||||
|
||||
Use `examples/values-prod-ha.yaml`:
|
||||
|
||||
```bash
|
||||
helm install certctl certctl/ \
|
||||
--values examples/values-prod-ha.yaml \
|
||||
--set server.auth.apiKey="$(openssl rand -base64 32)" \
|
||||
--set postgresql.auth.password="$(openssl rand -base64 32)"
|
||||
```
|
||||
|
||||
**Features**:
|
||||
- 3 server replicas with pod anti-affinity
|
||||
- Large database storage (100Gi)
|
||||
- Pod disruption budgets
|
||||
- Prometheus monitoring enabled
|
||||
- Production resource limits
|
||||
|
||||
### External PostgreSQL
|
||||
|
||||
Use `examples/values-external-db.yaml`:
|
||||
|
||||
```bash
|
||||
helm install certctl certctl/ \
|
||||
--values examples/values-external-db.yaml \
|
||||
--set postgresql.enabled=false \
|
||||
--set 'server.env.CERTCTL_DATABASE_URL=postgres://...'
|
||||
```
|
||||
|
||||
**Use cases**:
|
||||
- AWS RDS
|
||||
- Google Cloud SQL
|
||||
- Azure Database for PostgreSQL
|
||||
- External self-managed PostgreSQL
|
||||
|
||||
### ACME with DNS-01
|
||||
|
||||
Use `examples/values-acme-dns01.yaml`:
|
||||
|
||||
```bash
|
||||
helm install certctl certctl/ \
|
||||
--values examples/values-acme-dns01.yaml
|
||||
```
|
||||
|
||||
**Enables**:
|
||||
- Automatic certificate issuance from Let's Encrypt
|
||||
- DNS-01 challenge (wildcard support)
|
||||
- Custom DNS provider scripts
|
||||
|
||||
## Configuration Options
|
||||
|
||||
### Server Configuration
|
||||
|
||||
| Option | Default | Description |
|
||||
|--------|---------|-------------|
|
||||
| `server.replicas` | 1 | Number of server replicas |
|
||||
| `server.port` | 8443 | Server port |
|
||||
| `server.auth.type` | api-key | Authentication type |
|
||||
| `server.auth.apiKey` | "" | API key (REQUIRED) |
|
||||
| `server.logging.level` | info | Log level |
|
||||
| `server.logging.format` | json | Log format |
|
||||
|
||||
### PostgreSQL Configuration
|
||||
|
||||
| Option | Default | Description |
|
||||
|--------|---------|-------------|
|
||||
| `postgresql.enabled` | true | Enable internal PostgreSQL |
|
||||
| `postgresql.storage.size` | 10Gi | Database storage size |
|
||||
| `postgresql.storage.storageClass` | "" | Storage class name |
|
||||
| `postgresql.auth.password` | "" | Database password (REQUIRED) |
|
||||
|
||||
### Agent Configuration
|
||||
|
||||
| Option | Default | Description |
|
||||
|--------|---------|-------------|
|
||||
| `agent.enabled` | true | Deploy agents |
|
||||
| `agent.kind` | DaemonSet | DaemonSet or Deployment |
|
||||
| `agent.replicas` | 1 | Replicas (Deployment only) |
|
||||
| `agent.keyDir` | /var/lib/certctl/keys | Key storage directory |
|
||||
|
||||
### Issuer Configuration
|
||||
|
||||
| Option | Default | Description |
|
||||
|--------|---------|-------------|
|
||||
| `server.issuer.local.enabled` | true | Enable Local CA |
|
||||
| `server.issuer.acme.enabled` | false | Enable ACME |
|
||||
| `server.issuer.acme.directoryURL` | "" | ACME directory URL |
|
||||
| `server.issuer.acme.email` | "" | ACME email |
|
||||
| `server.issuer.acme.challengeType` | http-01 | Challenge type |
|
||||
|
||||
See `values.yaml` for complete configuration options.
|
||||
|
||||
## Helm Template Functions
|
||||
|
||||
Defined in `templates/_helpers.tpl`:
|
||||
|
||||
| Function | Purpose |
|
||||
|----------|---------|
|
||||
| `certctl.name` | Chart name |
|
||||
| `certctl.fullname` | Full release name |
|
||||
| `certctl.chart` | Chart name and version |
|
||||
| `certctl.labels` | Common labels |
|
||||
| `certctl.selectorLabels` | Selector labels |
|
||||
| `certctl.serverSelectorLabels` | Server selector labels |
|
||||
| `certctl.agentSelectorLabels` | Agent selector labels |
|
||||
| `certctl.postgresSelectorLabels` | PostgreSQL selector labels |
|
||||
| `certctl.serviceAccountName` | ServiceAccount name |
|
||||
| `certctl.serverImage` | Server image URI |
|
||||
| `certctl.agentImage` | Agent image URI |
|
||||
| `certctl.postgresImage` | PostgreSQL image URI |
|
||||
| `certctl.databaseURL` | Database connection string |
|
||||
| `certctl.serverURL` | Server URL for agents |
|
||||
|
||||
## Security Features
|
||||
|
||||
### Pod Security
|
||||
|
||||
- Non-root users (UID 1000 for app, UID 999 for PostgreSQL)
|
||||
- Read-only root filesystems
|
||||
- No privilege escalation
|
||||
- Dropped capabilities (ALL)
|
||||
- Resource limits to prevent DoS
|
||||
|
||||
### Secrets Management
|
||||
|
||||
- All sensitive data in Kubernetes Secrets
|
||||
- Base64 encoded at rest
|
||||
- Can be integrated with:
|
||||
- sealed-secrets
|
||||
- external-secrets
|
||||
- Vault
|
||||
- AWS Secrets Manager
|
||||
|
||||
### RBAC
|
||||
|
||||
- ServiceAccount per release
|
||||
- Optional ClusterRole/ClusterRoleBinding
|
||||
- Extensible for custom permissions
|
||||
|
||||
### Network Security
|
||||
|
||||
- Support for Kubernetes NetworkPolicies
|
||||
- Service-to-service communication via internal DNS
|
||||
- Optional Ingress with TLS
|
||||
|
||||
## Monitoring and Observability
|
||||
|
||||
### Health Checks
|
||||
|
||||
- Liveness probes (detect dead containers)
|
||||
- Readiness probes (detect not-ready services)
|
||||
- HTTP endpoints: `/health`, `/readyz`
|
||||
|
||||
### Logging
|
||||
|
||||
- Structured JSON logging
|
||||
- Request ID propagation
|
||||
- Configurable log levels (debug, info, warn, error)
|
||||
|
||||
### Metrics
|
||||
|
||||
- Prometheus metrics endpoint: `/api/v1/metrics/prometheus`
|
||||
- Optional ServiceMonitor for Prometheus Operator
|
||||
- Built-in metrics:
|
||||
- Certificate counts by status
|
||||
- Agent counts and status
|
||||
- Job completion/failure rates
|
||||
- Server uptime
|
||||
|
||||
## Installation Quick Reference
|
||||
|
||||
```bash
|
||||
# Development
|
||||
helm install certctl certctl/ \
|
||||
--set server.auth.apiKey=dev \
|
||||
--set postgresql.auth.password=dev
|
||||
|
||||
# Production HA
|
||||
helm install certctl certctl/ \
|
||||
--values examples/values-prod-ha.yaml \
|
||||
--set server.auth.apiKey="$(openssl rand -base64 32)" \
|
||||
--set postgresql.auth.password="$(openssl rand -base64 32)"
|
||||
|
||||
# External database
|
||||
helm install certctl certctl/ \
|
||||
--values examples/values-external-db.yaml \
|
||||
--set postgresql.enabled=false \
|
||||
--set 'server.env.CERTCTL_DATABASE_URL=postgres://...'
|
||||
|
||||
# ACME with Let's Encrypt
|
||||
helm install certctl certctl/ \
|
||||
--set server.issuer.acme.enabled=true \
|
||||
--set server.issuer.acme.directoryURL=https://acme-v02.api.letsencrypt.org/directory
|
||||
|
||||
# Check status
|
||||
kubectl get pods -l app.kubernetes.io/instance=certctl
|
||||
kubectl logs -l app.kubernetes.io/component=server -f
|
||||
|
||||
# Upgrade
|
||||
helm upgrade certctl certctl/ -f new-values.yaml
|
||||
|
||||
# Uninstall
|
||||
helm uninstall certctl
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Use Secrets Management
|
||||
|
||||
```bash
|
||||
# Use sealed-secrets
|
||||
kubectl create secret generic certctl-secrets \
|
||||
--from-literal=api-key="$(openssl rand -base64 32)" \
|
||||
--dry-run=client -o yaml | kubeseal -f - | kubectl apply -f -
|
||||
```
|
||||
|
||||
### 2. Configure Resource Limits
|
||||
|
||||
Match limits to your cluster capacity:
|
||||
|
||||
```yaml
|
||||
server:
|
||||
resources:
|
||||
requests: {cpu: 250m, memory: 256Mi}
|
||||
limits: {cpu: 1000m, memory: 512Mi}
|
||||
```
|
||||
|
||||
### 3. Enable HA for Production
|
||||
|
||||
```yaml
|
||||
server:
|
||||
replicas: 3
|
||||
podAntiAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution: [...]
|
||||
```
|
||||
|
||||
### 4. Use Persistent Storage
|
||||
|
||||
```yaml
|
||||
postgresql:
|
||||
storage:
|
||||
size: 100Gi
|
||||
storageClass: fast-ssd
|
||||
```
|
||||
|
||||
### 5. Enable Monitoring
|
||||
|
||||
```yaml
|
||||
monitoring:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
- **README.md** - Complete Helm chart documentation
|
||||
- **DEPLOYMENT_GUIDE.md** - Step-by-step deployment instructions
|
||||
- **values.yaml** - Commented configuration reference
|
||||
|
||||
## Support
|
||||
|
||||
For issues, questions, or contributions:
|
||||
- GitHub: https://github.com/shankar0123/certctl
|
||||
- Documentation: https://github.com/shankar0123/certctl/tree/main/docs
|
||||
|
||||
## License
|
||||
|
||||
BSL-1.1 (Business Source License)
|
||||
Converts to Apache 2.0 on March 28, 2033
|
||||
@@ -0,0 +1,515 @@
|
||||
# Certctl Helm Deployment Guide
|
||||
|
||||
Complete guide for deploying certctl on Kubernetes with Helm.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Prerequisites](#prerequisites)
|
||||
2. [Installation Methods](#installation-methods)
|
||||
3. [Production Deployment](#production-deployment)
|
||||
4. [Configuration Examples](#configuration-examples)
|
||||
5. [Post-Deployment Setup](#post-deployment-setup)
|
||||
6. [Monitoring and Logging](#monitoring-and-logging)
|
||||
7. [Maintenance](#maintenance)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### Required Tools
|
||||
|
||||
```bash
|
||||
# Verify Kubernetes cluster access
|
||||
kubectl cluster-info
|
||||
kubectl get nodes
|
||||
|
||||
# Install Helm (if not already installed)
|
||||
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||
helm version
|
||||
|
||||
# Verify Helm installation
|
||||
helm repo list
|
||||
```
|
||||
|
||||
### Kubernetes Requirements
|
||||
|
||||
- Kubernetes 1.19 or later
|
||||
- At least 2GB available memory
|
||||
- At least 10GB available storage (for PostgreSQL)
|
||||
- Network policies support (optional, for security)
|
||||
- Ingress controller (nginx, istio, etc.) - optional
|
||||
|
||||
### Create Namespace
|
||||
|
||||
```bash
|
||||
# Create isolated namespace
|
||||
kubectl create namespace certctl
|
||||
|
||||
# Set as default namespace
|
||||
kubectl config set-context --current --namespace=certctl
|
||||
|
||||
# Label for network policies (optional)
|
||||
kubectl label namespace certctl certctl-ns=true
|
||||
```
|
||||
|
||||
## Installation Methods
|
||||
|
||||
### Method 1: Minimal Development Setup
|
||||
|
||||
Perfect for testing and development:
|
||||
|
||||
```bash
|
||||
# Install with minimal configuration
|
||||
helm install certctl certctl/certctl \
|
||||
--namespace certctl \
|
||||
--set server.auth.apiKey="dev-key-change-in-production" \
|
||||
--set postgresql.auth.password="dev-password-change-in-production"
|
||||
|
||||
# Wait for deployment
|
||||
kubectl rollout status deployment/certctl-server
|
||||
kubectl rollout status statefulset/certctl-postgres
|
||||
```
|
||||
|
||||
### Method 2: Production HA Setup
|
||||
|
||||
For production workloads:
|
||||
|
||||
```bash
|
||||
# Generate secure credentials
|
||||
API_KEY=$(openssl rand -base64 32)
|
||||
DB_PASSWORD=$(openssl rand -base64 32)
|
||||
|
||||
# Install with HA configuration
|
||||
helm install certctl certctl/certctl \
|
||||
--namespace certctl \
|
||||
--values deploy/helm/examples/values-prod-ha.yaml \
|
||||
--set server.auth.apiKey="$API_KEY" \
|
||||
--set postgresql.auth.password="$DB_PASSWORD"
|
||||
```
|
||||
|
||||
### Method 3: External PostgreSQL
|
||||
|
||||
Using managed database service:
|
||||
|
||||
```bash
|
||||
# Install with external database
|
||||
helm install certctl certctl/certctl \
|
||||
--namespace certctl \
|
||||
--values deploy/helm/examples/values-external-db.yaml \
|
||||
--set server.auth.apiKey="$API_KEY" \
|
||||
--set 'server.env.CERTCTL_DATABASE_URL=postgres://user:pass@db.example.com:5432/certctl?sslmode=require'
|
||||
```
|
||||
|
||||
### Method 4: Using Custom values.yaml
|
||||
|
||||
Recommended for GitOps workflows:
|
||||
|
||||
```bash
|
||||
# Create values file with secrets management
|
||||
cat > /tmp/certctl-values.yaml <<EOF
|
||||
server:
|
||||
auth:
|
||||
apiKey: "$API_KEY"
|
||||
logging:
|
||||
level: info
|
||||
|
||||
postgresql:
|
||||
auth:
|
||||
password: "$DB_PASSWORD"
|
||||
storage:
|
||||
size: 50Gi
|
||||
|
||||
agent:
|
||||
enabled: true
|
||||
kind: DaemonSet
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: nginx
|
||||
hosts:
|
||||
- host: certctl.example.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
EOF
|
||||
|
||||
# Install using values file
|
||||
helm install certctl certctl/certctl \
|
||||
--namespace certctl \
|
||||
--values /tmp/certctl-values.yaml
|
||||
```
|
||||
|
||||
## Production Deployment
|
||||
|
||||
### Step 1: Prepare Environment
|
||||
|
||||
```bash
|
||||
# Create namespace
|
||||
kubectl create namespace certctl
|
||||
cd deploy/helm
|
||||
|
||||
# Generate credentials
|
||||
API_KEY=$(openssl rand -base64 32)
|
||||
DB_PASSWORD=$(openssl rand -base64 32)
|
||||
|
||||
echo "API Key: $API_KEY"
|
||||
echo "DB Password: $DB_PASSWORD"
|
||||
|
||||
# Save credentials in secure location (e.g., 1Password, Vault, AWS Secrets Manager)
|
||||
```
|
||||
|
||||
### Step 2: Prepare Storage
|
||||
|
||||
```bash
|
||||
# List available storage classes
|
||||
kubectl get storageclass
|
||||
|
||||
# If needed, create a high-performance storage class for production
|
||||
cat <<EOF | kubectl apply -f -
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: fast-ssd
|
||||
provisioner: ebs.csi.aws.com # For AWS, adjust for your cloud provider
|
||||
parameters:
|
||||
type: gp3
|
||||
iops: "3000"
|
||||
throughput: "125"
|
||||
EOF
|
||||
```
|
||||
|
||||
### Step 3: Set Up TLS with cert-manager
|
||||
|
||||
```bash
|
||||
# Install cert-manager (if not already installed)
|
||||
helm repo add jetstack https://charts.jetstack.io
|
||||
helm repo update
|
||||
helm install cert-manager jetstack/cert-manager \
|
||||
--namespace cert-manager \
|
||||
--create-namespace \
|
||||
--set installCRDs=true
|
||||
|
||||
# Create ClusterIssuer for Let's Encrypt
|
||||
kubectl apply -f - <<EOF
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: letsencrypt-prod
|
||||
spec:
|
||||
acme:
|
||||
server: https://acme-v02.api.letsencrypt.org/directory
|
||||
email: admin@example.com
|
||||
privateKeySecretRef:
|
||||
name: letsencrypt-prod
|
||||
solvers:
|
||||
- http01:
|
||||
ingress:
|
||||
class: nginx
|
||||
EOF
|
||||
```
|
||||
|
||||
### Step 4: Install Certctl
|
||||
|
||||
```bash
|
||||
# Install using HA values
|
||||
helm install certctl certctl/ \
|
||||
--namespace certctl \
|
||||
--values examples/values-prod-ha.yaml \
|
||||
--set server.auth.apiKey="$API_KEY" \
|
||||
--set postgresql.auth.password="$DB_PASSWORD" \
|
||||
--set ingress.annotations."cert-manager\.io/cluster-issuer"=letsencrypt-prod \
|
||||
--set ingress.hosts[0].host=certctl.example.com
|
||||
|
||||
# Verify installation
|
||||
kubectl get all -l app.kubernetes.io/instance=certctl
|
||||
```
|
||||
|
||||
### Step 5: Verify Deployment
|
||||
|
||||
```bash
|
||||
# Check pod status
|
||||
kubectl get pods -l app.kubernetes.io/instance=certctl
|
||||
kubectl describe pods -l app.kubernetes.io/instance=certctl
|
||||
|
||||
# Check service status
|
||||
kubectl get svc -l app.kubernetes.io/instance=certctl
|
||||
|
||||
# Check ingress status
|
||||
kubectl get ingress
|
||||
kubectl describe ingress certctl
|
||||
|
||||
# Test API connectivity
|
||||
POD=$(kubectl get pods -l app.kubernetes.io/component=server -o jsonpath='{.items[0].metadata.name}')
|
||||
kubectl port-forward $POD 8443:8443 &
|
||||
curl -H "Authorization: Bearer $API_KEY" http://localhost:8443/health
|
||||
```
|
||||
|
||||
### Step 6: Access the Dashboard
|
||||
|
||||
```bash
|
||||
# Port forward to local machine
|
||||
kubectl port-forward svc/certctl-server 8443:8443 &
|
||||
|
||||
# Or if using Ingress:
|
||||
# Open browser: https://certctl.example.com
|
||||
# Login with API key: $API_KEY
|
||||
```
|
||||
|
||||
## Configuration Examples
|
||||
|
||||
### Example 1: ACME (Let's Encrypt)
|
||||
|
||||
```bash
|
||||
helm install certctl certctl/ \
|
||||
--set server.issuer.acme.enabled=true \
|
||||
--set server.issuer.acme.directoryURL=https://acme-v02.api.letsencrypt.org/directory \
|
||||
--set server.issuer.acme.email=admin@example.com \
|
||||
--set server.issuer.acme.challengeType=http-01
|
||||
```
|
||||
|
||||
### Example 2: DNS-01 (Wildcard Certs)
|
||||
|
||||
Requires DNS scripts ConfigMap:
|
||||
|
||||
```bash
|
||||
# Create DNS scripts ConfigMap
|
||||
kubectl create configmap dns-scripts \
|
||||
--from-file=dns-present.sh=./scripts/dns-present.sh \
|
||||
--from-file=dns-cleanup.sh=./scripts/dns-cleanup.sh
|
||||
|
||||
# Install with DNS-01
|
||||
helm install certctl certctl/ \
|
||||
--set server.issuer.acme.enabled=true \
|
||||
--set server.issuer.acme.challengeType=dns-01 \
|
||||
--values examples/values-acme-dns01.yaml
|
||||
```
|
||||
|
||||
### Example 3: AWS RDS Database
|
||||
|
||||
```bash
|
||||
helm install certctl certctl/ \
|
||||
--set postgresql.enabled=false \
|
||||
--set 'server.env.CERTCTL_DATABASE_URL=postgres://user:password@mydb.c9akciq32.us-east-1.rds.amazonaws.com:5432/certctl?sslmode=require'
|
||||
```
|
||||
|
||||
### Example 4: Multiple Issuers
|
||||
|
||||
```bash
|
||||
helm install certctl certctl/ \
|
||||
--set server.issuer.local.enabled=true \
|
||||
--set server.issuer.acme.enabled=true \
|
||||
--set server.issuer.acme.directoryURL=https://acme-v02.api.letsencrypt.org/directory
|
||||
```
|
||||
|
||||
### Example 5: Email Notifications
|
||||
|
||||
```bash
|
||||
helm install certctl certctl/ \
|
||||
--set server.smtp.enabled=true \
|
||||
--set server.smtp.host=smtp.example.com \
|
||||
--set server.smtp.port=587 \
|
||||
--set server.smtp.username=alerts@example.com \
|
||||
--set server.smtp.password="$SMTP_PASSWORD" \
|
||||
--set server.smtp.fromAddress=certctl@example.com
|
||||
```
|
||||
|
||||
## Post-Deployment Setup
|
||||
|
||||
### 1. Initial Database Setup
|
||||
|
||||
```bash
|
||||
# Check database connection
|
||||
POD=$(kubectl get pods -l app.kubernetes.io/component=postgres -o jsonpath='{.items[0].metadata.name}')
|
||||
|
||||
# Execute psql commands
|
||||
kubectl exec -it $POD -- \
|
||||
psql -U certctl -d certctl -c '\dt'
|
||||
|
||||
# View database status
|
||||
kubectl logs $POD | tail -20
|
||||
```
|
||||
|
||||
### 2. Create Default Certificates
|
||||
|
||||
```bash
|
||||
# Port forward to API
|
||||
kubectl port-forward svc/certctl-server 8443:8443 &
|
||||
|
||||
# Create a test certificate
|
||||
API_KEY="your-api-key"
|
||||
curl -X POST http://localhost:8443/api/v1/certificates \
|
||||
-H "Authorization: Bearer $API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"common_name": "test.example.com",
|
||||
"sans": ["test.example.com", "*.example.com"],
|
||||
"owner": "admin@example.com"
|
||||
}'
|
||||
```
|
||||
|
||||
### 3. Configure Agents
|
||||
|
||||
```bash
|
||||
# Get agent names
|
||||
kubectl get pods -l app.kubernetes.io/component=agent -o wide
|
||||
|
||||
# Check agent connectivity
|
||||
POD=$(kubectl get pods -l app.kubernetes.io/component=agent -o jsonpath='{.items[0].metadata.name}')
|
||||
kubectl logs $POD | grep -i heartbeat
|
||||
```
|
||||
|
||||
### 4. Set Up HTTPS for Web Dashboard
|
||||
|
||||
The Ingress will handle TLS if configured properly:
|
||||
|
||||
```bash
|
||||
# Verify ingress is ready
|
||||
kubectl get ingress
|
||||
kubectl describe ingress certctl
|
||||
|
||||
# Test HTTPS
|
||||
curl https://certctl.example.com/health
|
||||
```
|
||||
|
||||
## Monitoring and Logging
|
||||
|
||||
### 1. View Logs
|
||||
|
||||
```bash
|
||||
# Server logs
|
||||
kubectl logs -l app.kubernetes.io/component=server -f --all-containers=true
|
||||
|
||||
# PostgreSQL logs
|
||||
kubectl logs -l app.kubernetes.io/component=postgres -f
|
||||
|
||||
# Agent logs
|
||||
kubectl logs -l app.kubernetes.io/component=agent -f --all-containers=true
|
||||
|
||||
# Logs from all components
|
||||
kubectl logs -l app.kubernetes.io/instance=certctl -f --all-containers=true
|
||||
```
|
||||
|
||||
### 2. Install Prometheus Monitoring
|
||||
|
||||
```bash
|
||||
# Install Prometheus operator (if not already installed)
|
||||
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||
helm repo update
|
||||
|
||||
helm install prometheus prometheus-community/kube-prometheus-stack \
|
||||
--namespace monitoring \
|
||||
--create-namespace
|
||||
|
||||
# Certctl will automatically expose metrics if monitoring.enabled=true
|
||||
helm install certctl certctl/ \
|
||||
--set monitoring.enabled=true \
|
||||
--set monitoring.serviceMonitor.enabled=true
|
||||
```
|
||||
|
||||
### 3. Set Up Alerts
|
||||
|
||||
```bash
|
||||
# Create Prometheus alerts
|
||||
cat <<EOF | kubectl apply -f -
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: certctl-alerts
|
||||
spec:
|
||||
groups:
|
||||
- name: certctl
|
||||
interval: 30s
|
||||
rules:
|
||||
- alert: CertctlServerDown
|
||||
expr: up{job="certctl-server"} == 0
|
||||
for: 5m
|
||||
annotations:
|
||||
summary: "Certctl server is down"
|
||||
|
||||
- alert: CertificateExpiringSoon
|
||||
expr: certctl_certificate_expiring_soon > 0
|
||||
for: 1h
|
||||
annotations:
|
||||
summary: "{{ \$value }} certificates expiring soon"
|
||||
EOF
|
||||
```
|
||||
|
||||
## Maintenance
|
||||
|
||||
### Scaling
|
||||
|
||||
```bash
|
||||
# Scale server replicas
|
||||
helm upgrade certctl certctl/ \
|
||||
--set server.replicas=5
|
||||
|
||||
# Scale agents (Deployment kind only)
|
||||
helm upgrade certctl certctl/ \
|
||||
--set agent.kind=Deployment \
|
||||
--set agent.replicas=10
|
||||
```
|
||||
|
||||
### Updating
|
||||
|
||||
```bash
|
||||
# Update chart version
|
||||
helm repo update
|
||||
helm upgrade certctl certctl/certctl \
|
||||
--namespace certctl \
|
||||
-f values.yaml
|
||||
|
||||
# Verify update
|
||||
kubectl rollout status deployment/certctl-server
|
||||
kubectl rollout status statefulset/certctl-postgres
|
||||
```
|
||||
|
||||
### Backup and Restore
|
||||
|
||||
```bash
|
||||
# Backup PostgreSQL data
|
||||
kubectl exec -i $(kubectl get pods -l app.kubernetes.io/component=postgres -o jsonpath='{.items[0].metadata.name}') \
|
||||
pg_dump -U certctl certctl | gzip > certctl-backup.sql.gz
|
||||
|
||||
# Restore from backup
|
||||
zcat certctl-backup.sql.gz | kubectl exec -i $(kubectl get pods -l app.kubernetes.io/component=postgres -o jsonpath='{.items[0].metadata.name}') \
|
||||
psql -U certctl certctl
|
||||
|
||||
# Backup PVC data
|
||||
kubectl get pvc
|
||||
kubectl exec -i $(kubectl get pods -l app.kubernetes.io/component=postgres -o jsonpath='{.items[0].metadata.name}') \
|
||||
tar czf - /var/lib/postgresql/data | gzip > certctl-data-backup.tar.gz
|
||||
```
|
||||
|
||||
### Uninstall
|
||||
|
||||
```bash
|
||||
# Remove Helm release (keeps PVCs by default)
|
||||
helm uninstall certctl --namespace certctl
|
||||
|
||||
# Delete PVCs if needed
|
||||
kubectl delete pvc --all -n certctl
|
||||
|
||||
# Delete namespace
|
||||
kubectl delete namespace certctl
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
See [README.md](README.md#troubleshooting) for detailed troubleshooting steps.
|
||||
|
||||
Common commands:
|
||||
|
||||
```bash
|
||||
# Get all resources
|
||||
kubectl get all -n certctl
|
||||
|
||||
# Describe pod for events
|
||||
kubectl describe pod <pod-name> -n certctl
|
||||
|
||||
# Stream logs
|
||||
kubectl logs -f <pod-name> -n certctl
|
||||
|
||||
# Execute commands in pod
|
||||
kubectl exec -it <pod-name> -n certctl -- /bin/sh
|
||||
|
||||
# Check events
|
||||
kubectl get events -n certctl --sort-by='.lastTimestamp'
|
||||
```
|
||||
@@ -0,0 +1,234 @@
|
||||
# Certctl Helm Chart - Complete File Index
|
||||
|
||||
## Navigation Guide
|
||||
|
||||
### Getting Started
|
||||
|
||||
1. **Start here**: `INSTALLATION.md` - Quick installation guide with one-liners
|
||||
2. **Full reference**: `README.md` - Complete Helm chart documentation
|
||||
3. **Detailed guide**: `DEPLOYMENT_GUIDE.md` - Step-by-step deployment walkthrough
|
||||
4. **Architecture**: `CHART_SUMMARY.md` - Technical overview and design
|
||||
|
||||
### Chart Directory Structure
|
||||
|
||||
```
|
||||
deploy/helm/
|
||||
│
|
||||
├── README.md Main documentation (15 KB)
|
||||
├── DEPLOYMENT_GUIDE.md Step-by-step guide (12 KB)
|
||||
├── CHART_SUMMARY.md Architecture & design (13 KB)
|
||||
├── INSTALLATION.md Quick start (2.2 KB)
|
||||
├── INDEX.md This file
|
||||
│
|
||||
├── certctl/ Helm chart package
|
||||
│ ├── Chart.yaml Chart metadata
|
||||
│ ├── values.yaml Default configuration (11 KB)
|
||||
│ ├── .helmignore Build ignore patterns
|
||||
│ │
|
||||
│ └── templates/ 15 Kubernetes resource templates
|
||||
│ ├── _helpers.tpl Helper functions
|
||||
│ ├── NOTES.txt Post-install notes
|
||||
│ ├── server-deployment.yaml API server
|
||||
│ ├── server-service.yaml Server networking
|
||||
│ ├── server-configmap.yaml Server configuration
|
||||
│ ├── server-secret.yaml Server secrets
|
||||
│ ├── postgres-statefulset.yaml Database
|
||||
│ ├── postgres-service.yaml Database networking
|
||||
│ ├── postgres-secret.yaml Database secrets
|
||||
│ ├── agent-daemonset.yaml Agents (DaemonSet/Deployment)
|
||||
│ ├── agent-configmap.yaml Agent configuration
|
||||
│ ├── ingress.yaml Optional HTTPS ingress
|
||||
│ └── serviceaccount.yaml RBAC resources
|
||||
│
|
||||
└── examples/ Example configurations
|
||||
├── values-dev.yaml Development setup
|
||||
├── values-prod-ha.yaml Production HA setup
|
||||
├── values-external-db.yaml External PostgreSQL
|
||||
└── values-acme-dns01.yaml ACME DNS-01 configuration
|
||||
```
|
||||
|
||||
## File Descriptions
|
||||
|
||||
### Documentation Files
|
||||
|
||||
| File | Purpose | Size |
|
||||
|------|---------|------|
|
||||
| `README.md` | Complete Helm chart documentation, configuration reference, security considerations | 15 KB |
|
||||
| `DEPLOYMENT_GUIDE.md` | Step-by-step installation instructions, production setup, troubleshooting | 12 KB |
|
||||
| `CHART_SUMMARY.md` | Technical overview, architecture, features, best practices | 13 KB |
|
||||
| `INSTALLATION.md` | Quick start guide, one-liner commands, verification steps | 2.2 KB |
|
||||
| `INDEX.md` | This file - complete file index and navigation | - |
|
||||
|
||||
### Chart Files
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `Chart.yaml` | Helm chart metadata (name, version, appVersion, license) |
|
||||
| `values.yaml` | Default configuration values with comprehensive comments |
|
||||
| `.helmignore` | Files to ignore when building the chart |
|
||||
|
||||
### Template Files
|
||||
|
||||
| File | Components Created |
|
||||
|------|-------------------|
|
||||
| `_helpers.tpl` | 14 Helm template helper functions |
|
||||
| `NOTES.txt` | Post-installation notes and instructions |
|
||||
| `server-deployment.yaml` | Certctl API server deployment (1-N replicas) |
|
||||
| `server-service.yaml` | Service exposing the server |
|
||||
| `server-configmap.yaml` | Non-secret server configuration |
|
||||
| `server-secret.yaml` | Secrets (API key, DB password, SMTP) |
|
||||
| `postgres-statefulset.yaml` | PostgreSQL database with persistent storage |
|
||||
| `postgres-service.yaml` | Headless service for PostgreSQL |
|
||||
| `postgres-secret.yaml` | Database credentials |
|
||||
| `agent-daemonset.yaml` | Certctl agents (DaemonSet or Deployment) |
|
||||
| `agent-configmap.yaml` | Agent configuration |
|
||||
| `ingress.yaml` | Optional HTTPS ingress resource |
|
||||
| `serviceaccount.yaml` | ServiceAccount and RBAC resources |
|
||||
|
||||
### Example Configuration Files
|
||||
|
||||
| File | Use Case | Features |
|
||||
|------|----------|----------|
|
||||
| `values-dev.yaml` | Development/testing | Single replica, debug logging, LoadBalancer, no auth |
|
||||
| `values-prod-ha.yaml` | Production HA | 3 replicas, pod anti-affinity, monitoring, large storage |
|
||||
| `values-external-db.yaml` | External PostgreSQL | AWS RDS, Cloud SQL, Azure Database, self-managed |
|
||||
| `values-acme-dns01.yaml` | Let's Encrypt | DNS-01 challenges, wildcard certs, custom DNS scripts |
|
||||
|
||||
## Quick Links
|
||||
|
||||
### Installation Commands
|
||||
|
||||
#### Development
|
||||
```bash
|
||||
helm install certctl certctl/ \
|
||||
--set server.auth.type=none \
|
||||
--set postgresql.auth.password=dev
|
||||
```
|
||||
|
||||
#### Production HA
|
||||
```bash
|
||||
helm install certctl certctl/ \
|
||||
--values examples/values-prod-ha.yaml \
|
||||
--set server.auth.apiKey="$(openssl rand -base64 32)" \
|
||||
--set postgresql.auth.password="$(openssl rand -base64 32)"
|
||||
```
|
||||
|
||||
#### External Database
|
||||
```bash
|
||||
helm install certctl certctl/ \
|
||||
--values examples/values-external-db.yaml \
|
||||
--set postgresql.enabled=false \
|
||||
--set 'server.env.CERTCTL_DATABASE_URL=postgres://...'
|
||||
```
|
||||
|
||||
### Verification Commands
|
||||
|
||||
```bash
|
||||
# Check chart syntax
|
||||
helm lint certctl/
|
||||
helm template certctl certctl/
|
||||
|
||||
# Install in cluster
|
||||
helm install certctl certctl/
|
||||
helm status certctl
|
||||
|
||||
# Check pod status
|
||||
kubectl get pods -l app.kubernetes.io/instance=certctl
|
||||
|
||||
# View logs
|
||||
kubectl logs -l app.kubernetes.io/component=server -f
|
||||
```
|
||||
|
||||
## Documentation Organization
|
||||
|
||||
### By User Role
|
||||
|
||||
**DevOps/Platform Engineers**
|
||||
- Start: `INSTALLATION.md`
|
||||
- Deep dive: `DEPLOYMENT_GUIDE.md`
|
||||
- Configuration reference: `README.md`
|
||||
|
||||
**Kubernetes Developers**
|
||||
- Architecture: `CHART_SUMMARY.md`
|
||||
- Configuration: `values.yaml`
|
||||
- Templates: `templates/`
|
||||
|
||||
**Security/SREs**
|
||||
- Security section: `README.md#security-considerations`
|
||||
- RBAC: `templates/serviceaccount.yaml`
|
||||
- Network policies: `DEPLOYMENT_GUIDE.md#network-policies`
|
||||
|
||||
**Database Administrators**
|
||||
- PostgreSQL config: `values.yaml` (postgresql section)
|
||||
- External DB setup: `examples/values-external-db.yaml`
|
||||
- Backup/restore: `DEPLOYMENT_GUIDE.md#backup-and-restore`
|
||||
|
||||
### By Task
|
||||
|
||||
**Getting Started**
|
||||
1. Read: `INSTALLATION.md`
|
||||
2. Install: `helm install certctl certctl/`
|
||||
3. Verify: Run commands in `INSTALLATION.md`
|
||||
|
||||
**Production Deployment**
|
||||
1. Read: `DEPLOYMENT_GUIDE.md`
|
||||
2. Choose: `examples/values-prod-ha.yaml`
|
||||
3. Deploy: Follow step-by-step guide
|
||||
4. Reference: `README.md` for detailed options
|
||||
|
||||
**Troubleshooting**
|
||||
- Common issues: `README.md#troubleshooting`
|
||||
- Detailed guide: `DEPLOYMENT_GUIDE.md#troubleshooting`
|
||||
- Error messages: kubectl logs and events
|
||||
|
||||
**Configuration**
|
||||
- All options: `values.yaml`
|
||||
- Examples: `examples/values-*.yaml`
|
||||
- Detailed docs: `README.md#configuration`
|
||||
|
||||
## Key Features
|
||||
|
||||
### High Availability
|
||||
- Multi-replica server deployment
|
||||
- Pod anti-affinity
|
||||
- StatefulSet for database
|
||||
- Pod disruption budgets
|
||||
|
||||
### Security
|
||||
- Non-root containers
|
||||
- Read-only filesystems
|
||||
- RBAC support
|
||||
- Kubernetes Secrets
|
||||
- Network policies
|
||||
|
||||
### Flexibility
|
||||
- Multiple issuers (Local CA, ACME, step-ca, OpenSSL)
|
||||
- Internal or external PostgreSQL
|
||||
- DaemonSet or Deployment agents
|
||||
- Optional Ingress with TLS
|
||||
- Email notifications
|
||||
|
||||
### Observability
|
||||
- Health checks
|
||||
- Structured logging
|
||||
- Prometheus metrics
|
||||
- ServiceMonitor support
|
||||
|
||||
## Support
|
||||
|
||||
- **GitHub**: https://github.com/shankar0123/certctl
|
||||
- **Issues**: Report on GitHub issues
|
||||
- **Documentation**: All docs are in `deploy/helm/`
|
||||
|
||||
## File Statistics
|
||||
|
||||
- **Total files**: 24
|
||||
- **Documentation**: 4 files (42 KB)
|
||||
- **Chart files**: 3 files
|
||||
- **Templates**: 13 files
|
||||
- **Examples**: 4 files
|
||||
- **Total size**: 144 KB
|
||||
|
||||
## License
|
||||
|
||||
All files are covered under the BSL-1.1 license (converts to Apache 2.0 in 2033).
|
||||
@@ -0,0 +1,95 @@
|
||||
# Quick Installation Guide
|
||||
|
||||
## One-Liner Installation
|
||||
|
||||
### Development (no auth)
|
||||
```bash
|
||||
helm install certctl certctl/ \
|
||||
--set server.auth.type=none \
|
||||
--set postgresql.auth.password=dev
|
||||
```
|
||||
|
||||
### Production (with API key)
|
||||
```bash
|
||||
API_KEY=$(openssl rand -base64 32)
|
||||
DB_PASSWORD=$(openssl rand -base64 32)
|
||||
|
||||
helm install certctl certctl/ \
|
||||
--values examples/values-prod-ha.yaml \
|
||||
--set server.auth.apiKey="$API_KEY" \
|
||||
--set postgresql.auth.password="$DB_PASSWORD"
|
||||
```
|
||||
|
||||
## Verify Installation
|
||||
|
||||
```bash
|
||||
# Wait for pods to be ready
|
||||
kubectl rollout status deployment/certctl-server
|
||||
kubectl rollout status statefulset/certctl-postgres
|
||||
|
||||
# Check all components
|
||||
kubectl get pods -l app.kubernetes.io/instance=certctl
|
||||
|
||||
# View server logs
|
||||
kubectl logs -l app.kubernetes.io/component=server -f
|
||||
|
||||
# Access the API
|
||||
kubectl port-forward svc/certctl-server 8443:8443 &
|
||||
curl http://localhost:8443/health
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. **Read Documentation**
|
||||
- `README.md` - Complete reference
|
||||
- `DEPLOYMENT_GUIDE.md` - Step-by-step guide
|
||||
- `CHART_SUMMARY.md` - Architecture overview
|
||||
|
||||
2. **Configure for Your Environment**
|
||||
- Review `examples/` for your deployment scenario
|
||||
- Customize `values.yaml` as needed
|
||||
- Use `helm upgrade` to apply changes
|
||||
|
||||
3. **Set Up Monitoring**
|
||||
- Install Prometheus (optional)
|
||||
- Enable Ingress with HTTPS
|
||||
- Configure email notifications
|
||||
|
||||
4. **Deploy Agents**
|
||||
- Agents deploy automatically as DaemonSet
|
||||
- Verify with: `kubectl get pods -l app.kubernetes.io/component=agent`
|
||||
|
||||
5. **Create Certificates**
|
||||
- Configure issuer connectors (Local CA, ACME, etc.)
|
||||
- Access web dashboard at ingress or port-forward
|
||||
|
||||
## Common Commands
|
||||
|
||||
```bash
|
||||
# List installations
|
||||
helm list
|
||||
|
||||
# View chart values
|
||||
helm values certctl
|
||||
|
||||
# Upgrade chart
|
||||
helm upgrade certctl certctl/ -f new-values.yaml
|
||||
|
||||
# Rollback to previous version
|
||||
helm rollback certctl 1
|
||||
|
||||
# Uninstall chart
|
||||
helm uninstall certctl
|
||||
|
||||
# View deployment history
|
||||
helm history certctl
|
||||
|
||||
# Dry-run installation to see generated YAML
|
||||
helm install certctl certctl/ --dry-run --debug
|
||||
```
|
||||
|
||||
## Support
|
||||
|
||||
- Full documentation in `README.md`
|
||||
- Troubleshooting in `DEPLOYMENT_GUIDE.md`
|
||||
- Issues: https://github.com/shankar0123/certctl
|
||||
@@ -0,0 +1,516 @@
|
||||
# Certctl Helm Chart
|
||||
|
||||
Production-ready Helm chart for deploying certctl (self-hosted certificate lifecycle management platform) on Kubernetes.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Quick Start](#quick-start)
|
||||
2. [Chart Features](#chart-features)
|
||||
3. [Prerequisites](#prerequisites)
|
||||
4. [Installation](#installation)
|
||||
5. [Configuration](#configuration)
|
||||
6. [Usage Examples](#usage-examples)
|
||||
7. [Upgrading](#upgrading)
|
||||
8. [Uninstalling](#uninstalling)
|
||||
9. [Architecture](#architecture)
|
||||
10. [Security Considerations](#security-considerations)
|
||||
11. [Troubleshooting](#troubleshooting)
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Add the chart repository (when available)
|
||||
helm repo add certctl https://charts.example.com
|
||||
helm repo update
|
||||
|
||||
# Install with default values
|
||||
helm install certctl certctl/certctl \
|
||||
--set server.auth.apiKey="your-secure-api-key" \
|
||||
--set postgresql.auth.password="your-secure-password"
|
||||
|
||||
# Check installation status
|
||||
kubectl get pods -l app.kubernetes.io/instance=certctl
|
||||
```
|
||||
|
||||
## Chart Features
|
||||
|
||||
- **Server Deployment** — certctl control plane with configurable replicas
|
||||
- **PostgreSQL StatefulSet** — Persistent database with automatic schema migration
|
||||
- **Agent DaemonSet or Deployment** — Flexible agent deployment (per-node or custom replicas)
|
||||
- **Ingress Support** — Optional HTTPS ingress with cert-manager integration
|
||||
- **Security Contexts** — Non-root containers, read-only filesystems, minimal capabilities
|
||||
- **Resource Limits** — Configurable CPU and memory requests/limits
|
||||
- **Health Checks** — Liveness and readiness probes on all containers
|
||||
- **ConfigMaps and Secrets** — Centralized configuration management
|
||||
- **Service Account and RBAC** — Optional cluster role bindings
|
||||
- **Pod Disruption Budgets** — HA-ready with configurable disruption budgets
|
||||
- **Monitoring** — Optional Prometheus ServiceMonitor support
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Kubernetes 1.19 or later
|
||||
- Helm 3.0 or later
|
||||
- Optional: cert-manager (for automatic TLS certificate provisioning)
|
||||
- Optional: Prometheus (for metrics scraping)
|
||||
|
||||
## Installation
|
||||
|
||||
### 1. Using Chart from Repository
|
||||
|
||||
```bash
|
||||
helm repo add certctl https://charts.example.com
|
||||
helm repo update
|
||||
helm install certctl certctl/certctl -f my-values.yaml
|
||||
```
|
||||
|
||||
### 2. Using Local Chart
|
||||
|
||||
```bash
|
||||
cd deploy/helm
|
||||
helm install certctl certctl/ \
|
||||
--set server.auth.apiKey="$(openssl rand -base64 32)" \
|
||||
--set postgresql.auth.password="$(openssl rand -base64 32)"
|
||||
```
|
||||
|
||||
### 3. Minimal Production Installation
|
||||
|
||||
```bash
|
||||
helm install certctl certctl/certctl \
|
||||
--namespace certctl \
|
||||
--create-namespace \
|
||||
--set server.auth.apiKey="change-me" \
|
||||
--set postgresql.auth.password="change-me" \
|
||||
--set server.replicas=2 \
|
||||
--set server.resources.requests.cpu=200m \
|
||||
--set server.resources.requests.memory=256Mi \
|
||||
--set ingress.enabled=true \
|
||||
--set ingress.className=nginx \
|
||||
--set ingress.hosts[0].host=certctl.example.com
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### Server Configuration
|
||||
|
||||
```yaml
|
||||
server:
|
||||
replicas: 1 # Number of server replicas
|
||||
port: 8443 # Service port
|
||||
auth:
|
||||
type: api-key # Authentication type
|
||||
apiKey: "your-api-key" # REQUIRED for production
|
||||
logging:
|
||||
level: info # Log level (debug, info, warn, error)
|
||||
format: json # Output format
|
||||
issuer:
|
||||
local:
|
||||
enabled: true # Enable local CA issuer
|
||||
acme:
|
||||
enabled: false # Enable ACME issuer
|
||||
directoryURL: "" # ACME directory URL
|
||||
email: "" # ACME registration email
|
||||
challengeType: "http-01" # Challenge type (http-01, dns-01, dns-persist-01)
|
||||
```
|
||||
|
||||
### PostgreSQL Configuration
|
||||
|
||||
```yaml
|
||||
postgresql:
|
||||
enabled: true # Use managed PostgreSQL
|
||||
auth:
|
||||
database: certctl
|
||||
username: certctl
|
||||
password: "your-password" # REQUIRED
|
||||
storage:
|
||||
size: 10Gi # PVC size
|
||||
storageClass: "" # Use default StorageClass
|
||||
```
|
||||
|
||||
### Agent Configuration
|
||||
|
||||
```yaml
|
||||
agent:
|
||||
enabled: true # Deploy agents
|
||||
kind: DaemonSet # DaemonSet (one per node) or Deployment
|
||||
replicas: 1 # For Deployment kind only
|
||||
discoveryDirs: "" # Comma-separated cert discovery paths
|
||||
nodeSelector: {} # Node affinity for DaemonSet
|
||||
```
|
||||
|
||||
### Ingress Configuration
|
||||
|
||||
```yaml
|
||||
ingress:
|
||||
enabled: false
|
||||
className: nginx
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
hosts:
|
||||
- host: certctl.example.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- secretName: certctl-tls
|
||||
hosts:
|
||||
- certctl.example.com
|
||||
```
|
||||
|
||||
See `values.yaml` for all available configuration options.
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Example 1: High Availability Setup
|
||||
|
||||
```yaml
|
||||
# ha-values.yaml
|
||||
server:
|
||||
replicas: 3
|
||||
resources:
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 1000m
|
||||
memory: 512Mi
|
||||
|
||||
postgresql:
|
||||
storage:
|
||||
size: 50Gi
|
||||
|
||||
podAntiAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
- labelSelector:
|
||||
matchExpressions:
|
||||
- key: app.kubernetes.io/component
|
||||
operator: In
|
||||
values: [server]
|
||||
topologyKey: kubernetes.io/hostname
|
||||
```
|
||||
|
||||
Deploy with:
|
||||
```bash
|
||||
helm install certctl certctl/certctl -f ha-values.yaml
|
||||
```
|
||||
|
||||
### Example 2: External PostgreSQL Database
|
||||
|
||||
```yaml
|
||||
# external-db-values.yaml
|
||||
postgresql:
|
||||
enabled: false
|
||||
|
||||
server:
|
||||
env:
|
||||
CERTCTL_DATABASE_URL: "postgres://user:password@rds.example.com:5432/certctl?sslmode=require"
|
||||
```
|
||||
|
||||
Deploy with:
|
||||
```bash
|
||||
helm install certctl certctl/certctl -f external-db-values.yaml
|
||||
```
|
||||
|
||||
### Example 3: ACME + Let's Encrypt
|
||||
|
||||
```yaml
|
||||
# acme-values.yaml
|
||||
server:
|
||||
issuer:
|
||||
acme:
|
||||
enabled: true
|
||||
directoryURL: https://acme-v02.api.letsencrypt.org/directory
|
||||
email: admin@example.com
|
||||
challengeType: dns-01
|
||||
dnsPresentScript: /scripts/dns-present.sh
|
||||
dnsCleanupScript: /scripts/dns-cleanup.sh
|
||||
dnsPropagationWait: 30s
|
||||
```
|
||||
|
||||
### Example 4: Email Notifications via Slack + SMTP
|
||||
|
||||
```yaml
|
||||
# notifications-values.yaml
|
||||
server:
|
||||
smtp:
|
||||
enabled: true
|
||||
host: smtp.example.com
|
||||
port: 587
|
||||
username: certctl@example.com
|
||||
password: "smtp-password"
|
||||
fromAddress: certctl@example.com
|
||||
useTLS: true
|
||||
|
||||
notifiers:
|
||||
slack:
|
||||
enabled: true
|
||||
webhookUrl: https://hooks.slack.com/services/YOUR/WEBHOOK/URL
|
||||
channel: "#certificates"
|
||||
```
|
||||
|
||||
## Upgrading
|
||||
|
||||
```bash
|
||||
# Update chart repository
|
||||
helm repo update
|
||||
|
||||
# Upgrade release
|
||||
helm upgrade certctl certctl/certctl -f values.yaml
|
||||
|
||||
# View upgrade history
|
||||
helm history certctl
|
||||
|
||||
# Rollback to previous version
|
||||
helm rollback certctl 1
|
||||
```
|
||||
|
||||
## Uninstalling
|
||||
|
||||
```bash
|
||||
# Delete the release (keeps data by default)
|
||||
helm uninstall certctl
|
||||
|
||||
# Also delete persistent data
|
||||
kubectl delete pvc --all -l app.kubernetes.io/instance=certctl
|
||||
|
||||
# Delete namespace
|
||||
kubectl delete namespace certctl
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
### Components
|
||||
|
||||
```
|
||||
┌──────────────────────────────────────────────────────────────┐
|
||||
│ Kubernetes Cluster │
|
||||
├──────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌─────────────────┐ ┌──────────────────┐ │
|
||||
│ │ Ingress/LB │ │ Agent Pod 1 │ │
|
||||
│ │ (optional) │ │ (DaemonSet) │ │
|
||||
│ └────────┬────────┘ └──────────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ ┌──────────────────┐ │
|
||||
│ ┌─────────────────────────┐ │ Agent Pod 2 │ │
|
||||
│ │ Server Deployment │ │ (DaemonSet) │ │
|
||||
│ │ (1 to N replicas) │ └──────────────────┘ │
|
||||
│ │ - REST API │ │
|
||||
│ │ - Scheduler │ ┌──────────────────┐ │
|
||||
│ │ - UI Dashboard │ │ Agent Pod N │ │
|
||||
│ └────────┬────────────────┘ │ (DaemonSet) │ │
|
||||
│ │ └──────────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌──────────────────────────┐ │
|
||||
│ │ PostgreSQL StatefulSet │ │
|
||||
│ │ - Database │ │
|
||||
│ │ - PVC (persistent) │ │
|
||||
│ └──────────────────────────┘ │
|
||||
│ │
|
||||
└──────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Network Communication
|
||||
|
||||
- **Server → PostgreSQL**: Internal cluster DNS (`certctl-postgres:5432`)
|
||||
- **Agent → Server**: Internal cluster DNS (`certctl-server:8443`)
|
||||
- **External → Server**: Via Ingress or Service (ClusterIP/LoadBalancer/NodePort)
|
||||
|
||||
## Security Considerations
|
||||
|
||||
### 1. Secrets Management
|
||||
|
||||
All sensitive data is stored in Kubernetes Secrets:
|
||||
- PostgreSQL credentials
|
||||
- API keys
|
||||
- SMTP passwords
|
||||
- ACME account secrets
|
||||
|
||||
**Best Practices:**
|
||||
- Use sealed-secrets or external-secrets operator
|
||||
- Enable encryption at rest in etcd
|
||||
- Rotate secrets regularly
|
||||
|
||||
```bash
|
||||
# Example: Using sealed-secrets
|
||||
kubectl create secret generic certctl-api-key --from-literal=api-key="$(openssl rand -base64 32)" --dry-run=client -o yaml | kubeseal -f - | kubectl apply -f -
|
||||
```
|
||||
|
||||
### 2. RBAC
|
||||
|
||||
The chart creates minimal RBAC by default:
|
||||
- ServiceAccount per release
|
||||
- ClusterRole (empty, extensible)
|
||||
- ClusterRoleBinding
|
||||
|
||||
**To restrict further:**
|
||||
```yaml
|
||||
rbac:
|
||||
create: true
|
||||
# Add specific rules here
|
||||
```
|
||||
|
||||
### 3. Pod Security
|
||||
|
||||
All containers run with:
|
||||
- Non-root user (UID 1000)
|
||||
- Read-only root filesystem
|
||||
- No privilege escalation
|
||||
- Dropped capabilities (ALL)
|
||||
|
||||
### 4. Network Policies
|
||||
|
||||
Restrict pod-to-pod communication:
|
||||
|
||||
```yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: certctl-default-deny
|
||||
spec:
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/instance: certctl
|
||||
policyTypes:
|
||||
- Ingress
|
||||
- Egress
|
||||
ingress:
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
name: certctl
|
||||
egress:
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
name: certctl
|
||||
- to:
|
||||
- podSelector: {}
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 53 # DNS
|
||||
- protocol: UDP
|
||||
port: 53
|
||||
```
|
||||
|
||||
### 5. TLS/HTTPS
|
||||
|
||||
Enable HTTPS with cert-manager:
|
||||
|
||||
```bash
|
||||
helm install cert-manager jetstack/cert-manager \
|
||||
--namespace cert-manager \
|
||||
--create-namespace \
|
||||
--set installCRDs=true
|
||||
```
|
||||
|
||||
Then configure Ingress with TLS.
|
||||
|
||||
### 6. API Key Security
|
||||
|
||||
For production:
|
||||
1. Generate a strong API key: `openssl rand -base64 32`
|
||||
2. Store securely (Vault, sealed-secrets, etc.)
|
||||
3. Never commit to Git
|
||||
4. Rotate periodically
|
||||
|
||||
```bash
|
||||
# Generate and deploy API key
|
||||
NEW_KEY=$(openssl rand -base64 32)
|
||||
kubectl patch secret certctl-server -p "{\"data\":{\"api-key\":\"$(echo -n $NEW_KEY | base64)\"}}"
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### 1. Pods Not Starting
|
||||
|
||||
```bash
|
||||
# Check pod status
|
||||
kubectl get pods -l app.kubernetes.io/instance=certctl
|
||||
kubectl describe pod <pod-name>
|
||||
kubectl logs <pod-name>
|
||||
```
|
||||
|
||||
### 2. Database Connection Issues
|
||||
|
||||
```bash
|
||||
# Verify PostgreSQL is running
|
||||
kubectl get pods -l app.kubernetes.io/component=postgres
|
||||
kubectl logs -l app.kubernetes.io/component=postgres
|
||||
|
||||
# Test connection from server pod
|
||||
kubectl exec -it <server-pod> -- \
|
||||
psql postgres://certctl:password@certctl-postgres:5432/certctl
|
||||
```
|
||||
|
||||
### 3. Agent Not Connecting
|
||||
|
||||
```bash
|
||||
# Check agent logs
|
||||
kubectl logs -l app.kubernetes.io/component=agent
|
||||
|
||||
# Verify server is reachable
|
||||
kubectl exec -it <agent-pod> -- \
|
||||
wget -q -O - http://certctl-server:8443/health
|
||||
```
|
||||
|
||||
### 4. Persistent Data Loss
|
||||
|
||||
```bash
|
||||
# Check PVC status
|
||||
kubectl get pvc
|
||||
|
||||
# Verify data is being stored
|
||||
kubectl exec -it <postgres-pod> -- \
|
||||
ls -lah /var/lib/postgresql/data/postgres
|
||||
```
|
||||
|
||||
### 5. Permission Denied Errors
|
||||
|
||||
The chart runs containers as non-root (UID 1000). If you see permission errors:
|
||||
|
||||
```yaml
|
||||
# Temporarily allow root for debugging
|
||||
server:
|
||||
securityContext:
|
||||
runAsUser: 0 # NOT FOR PRODUCTION
|
||||
```
|
||||
|
||||
### 6. Out of Memory
|
||||
|
||||
Increase resource limits:
|
||||
|
||||
```bash
|
||||
helm upgrade certctl certctl/certctl \
|
||||
--set server.resources.limits.memory=1Gi \
|
||||
--set postgresql.resources.limits.memory=2Gi
|
||||
```
|
||||
|
||||
### 7. Certificate Validation Issues
|
||||
|
||||
For self-signed certificates:
|
||||
|
||||
```bash
|
||||
kubectl exec -it <pod> -- \
|
||||
CERTCTL_TLS_INSECURE_SKIP_VERIFY=true <command>
|
||||
```
|
||||
|
||||
### Common Issues and Solutions
|
||||
|
||||
| Issue | Solution |
|
||||
|-------|----------|
|
||||
| `ImagePullBackOff` | Update `server.image.repository` to your registry |
|
||||
| `CrashLoopBackOff` | Check logs with `kubectl logs <pod>` |
|
||||
| `Pending` PVC | Check storage class availability |
|
||||
| Connection timeout | Verify network policies and service DNS |
|
||||
| High memory usage | Adjust `postgresql.resources.limits` and `server.resources.limits` |
|
||||
|
||||
## Support and Contributing
|
||||
|
||||
For issues, questions, or contributions, visit:
|
||||
- GitHub: https://github.com/shankar0123/certctl
|
||||
- Documentation: https://github.com/shankar0123/certctl/tree/main/docs
|
||||
|
||||
## License
|
||||
|
||||
BSL-1.1 (converts to Apache 2.0 in 2033)
|
||||
@@ -0,0 +1,31 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob patterns, relative path patterns, and negated
|
||||
# patterns. Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
*.pyo
|
||||
*.pyc
|
||||
.pytest_cache/
|
||||
*.egg-info/
|
||||
dist/
|
||||
build/
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.sublime-project
|
||||
*.sublime-workspace
|
||||
# OS
|
||||
Thumbs.db
|
||||
# Helm
|
||||
Chart.lock
|
||||
@@ -0,0 +1,20 @@
|
||||
apiVersion: v2
|
||||
name: certctl
|
||||
description: Self-hosted certificate lifecycle management platform
|
||||
type: application
|
||||
version: 0.1.0
|
||||
appVersion: "2.1.0"
|
||||
keywords:
|
||||
- certificate
|
||||
- tls
|
||||
- ssl
|
||||
- pki
|
||||
- acme
|
||||
- lifecycle
|
||||
- kubernetes
|
||||
maintainers:
|
||||
- name: certctl
|
||||
home: https://github.com/shankar0123/certctl
|
||||
sources:
|
||||
- https://github.com/shankar0123/certctl
|
||||
license: BSL-1.1
|
||||
@@ -0,0 +1,68 @@
|
||||
1. Get the certctl Server URL by running:
|
||||
{{- if .Values.ingress.enabled }}
|
||||
https://{{ index .Values.ingress.hosts 0 "host" }}
|
||||
{{- else if contains "NodePort" .Values.server.service.type }}
|
||||
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "certctl.fullname" . }}-server)
|
||||
echo http://$NODE_IP:$NODE_PORT
|
||||
{{- else if contains "LoadBalancer" .Values.server.service.type }}
|
||||
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "certctl.fullname" . }}-server --template "{.status.loadBalancer.ingress[0].ip}")
|
||||
echo http://$SERVICE_IP:{{ .Values.server.service.port }}
|
||||
{{- else }}
|
||||
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "certctl.name" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=server" -o jsonpath="{.items[0].metadata.name}")
|
||||
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||
{{- end }}
|
||||
|
||||
2. Get the default API key:
|
||||
kubectl get secret --namespace {{ .Release.Namespace }} {{ include "certctl.fullname" . }}-server -o jsonpath="{.data.api-key}" | base64 --decode; echo
|
||||
|
||||
3. Get PostgreSQL connection details:
|
||||
Host: {{ include "certctl.fullname" . }}-postgres.{{ .Release.Namespace }}.svc.cluster.local
|
||||
Port: 5432
|
||||
Database: {{ .Values.postgresql.auth.database }}
|
||||
Username: {{ .Values.postgresql.auth.username }}
|
||||
Password: $(kubectl get secret --namespace {{ .Release.Namespace }} {{ include "certctl.fullname" . }}-postgres -o jsonpath="{.data.password}" | base64 --decode)
|
||||
|
||||
4. Check deployment status:
|
||||
kubectl get pods -n {{ .Release.Namespace }} -l app.kubernetes.io/instance={{ .Release.Name }}
|
||||
|
||||
5. View server logs:
|
||||
kubectl logs -n {{ .Release.Namespace }} -l app.kubernetes.io/name={{ include "certctl.name" . }},app.kubernetes.io/component=server -f
|
||||
|
||||
{{- if .Values.agent.enabled }}
|
||||
|
||||
6. View agent logs:
|
||||
kubectl logs -n {{ .Release.Namespace }} -l app.kubernetes.io/name={{ include "certctl.name" . }},app.kubernetes.io/component=agent -f
|
||||
|
||||
{{- end }}
|
||||
|
||||
IMPORTANT NOTES FOR PRODUCTION:
|
||||
|
||||
1. Update the API key for security:
|
||||
kubectl patch secret {{ include "certctl.fullname" . }}-server -n {{ .Release.Namespace }} \
|
||||
-p '{"data":{"api-key":"'$(echo -n "YOUR_NEW_API_KEY" | base64)'"}}'
|
||||
|
||||
2. Update PostgreSQL password:
|
||||
kubectl patch secret {{ include "certctl.fullname" . }}-postgres -n {{ .Release.Namespace }} \
|
||||
-p '{"data":{"password":"'$(echo -n "YOUR_NEW_PASSWORD" | base64)'"}}'
|
||||
|
||||
3. Configure certificate issuers (ACME, step-ca, etc.) via values.yaml:
|
||||
helm upgrade {{ .Release.Name }} certctl/certctl \
|
||||
--set server.issuer.acme.enabled=true \
|
||||
--set server.issuer.acme.directoryURL=https://acme-v02.api.letsencrypt.org/directory \
|
||||
--set server.issuer.acme.email=admin@example.com
|
||||
|
||||
4. For production with persistent databases and backups:
|
||||
- Use an external PostgreSQL managed service (AWS RDS, Cloud SQL, etc.)
|
||||
- Set postgresql.enabled=false and configure CERTCTL_DATABASE_URL in values
|
||||
|
||||
5. Enable HTTPS/TLS using an Ingress with certificate management:
|
||||
- Configure cert-manager for automatic TLS certificate renewal
|
||||
- Update ingress values with your domain and certificate issuer
|
||||
|
||||
6. Review security contexts and network policies:
|
||||
- All containers run as non-root
|
||||
- Implement network policies to restrict traffic between components
|
||||
- Consider pod security policies or security standards for your cluster
|
||||
@@ -0,0 +1,125 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "certctl.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
*/}}
|
||||
{{- define "certctl.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "certctl.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "certctl.labels" -}}
|
||||
helm.sh/chart: {{ include "certctl.chart" . }}
|
||||
{{ include "certctl.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- with .Values.commonLabels }}
|
||||
{{ toYaml . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels for the main service (server, agent, postgres)
|
||||
*/}}
|
||||
{{- define "certctl.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "certctl.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Server selector labels
|
||||
*/}}
|
||||
{{- define "certctl.serverSelectorLabels" -}}
|
||||
{{ include "certctl.selectorLabels" . }}
|
||||
app.kubernetes.io/component: server
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Agent selector labels
|
||||
*/}}
|
||||
{{- define "certctl.agentSelectorLabels" -}}
|
||||
{{ include "certctl.selectorLabels" . }}
|
||||
app.kubernetes.io/component: agent
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
PostgreSQL selector labels
|
||||
*/}}
|
||||
{{- define "certctl.postgresSelectorLabels" -}}
|
||||
{{ include "certctl.selectorLabels" . }}
|
||||
app.kubernetes.io/component: postgres
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Service account name
|
||||
*/}}
|
||||
{{- define "certctl.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
{{- default (include "certctl.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- else }}
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Server image
|
||||
*/}}
|
||||
{{- define "certctl.serverImage" -}}
|
||||
{{- $image := .Values.server.image }}
|
||||
{{- printf "%s:%s" $image.repository (coalesce $image.tag .Chart.AppVersion) }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Agent image
|
||||
*/}}
|
||||
{{- define "certctl.agentImage" -}}
|
||||
{{- $image := .Values.agent.image }}
|
||||
{{- printf "%s:%s" $image.repository (coalesce $image.tag .Chart.AppVersion) }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
PostgreSQL image
|
||||
*/}}
|
||||
{{- define "certctl.postgresImage" -}}
|
||||
{{- $image := .Values.postgresql.image }}
|
||||
{{- printf "%s:%s" $image.repository $image.tag }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Database connection string
|
||||
*/}}
|
||||
{{- define "certctl.databaseURL" -}}
|
||||
postgres://{{ .Values.postgresql.auth.username }}:$(POSTGRES_PASSWORD)@{{ include "certctl.fullname" . }}-postgres:5432/{{ .Values.postgresql.auth.database }}?sslmode=disable
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Server URL (for agents)
|
||||
*/}}
|
||||
{{- define "certctl.serverURL" -}}
|
||||
http://{{ include "certctl.fullname" . }}-server:{{ .Values.server.service.port }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,13 @@
|
||||
{{- if .Values.agent.enabled }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "certctl.fullname" . }}-agent
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: agent
|
||||
data:
|
||||
{{- if .Values.agent.discoveryDirs }}
|
||||
discovery-dirs: {{ .Values.agent.discoveryDirs | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,162 @@
|
||||
{{- if .Values.agent.enabled }}
|
||||
{{- if eq .Values.agent.kind "DaemonSet" }}
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: {{ include "certctl.fullname" . }}-agent
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: agent
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "certctl.agentSelectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "certctl.agentSelectorLabels" . | nindent 8 }}
|
||||
spec:
|
||||
serviceAccountName: {{ include "certctl.serviceAccountName" . }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.agent.securityContext | nindent 8 }}
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agent.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agent.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agent.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: agent
|
||||
image: {{ include "certctl.agentImage" . }}
|
||||
imagePullPolicy: {{ .Values.agent.image.pullPolicy }}
|
||||
env:
|
||||
- name: CERTCTL_SERVER_URL
|
||||
value: {{ include "certctl.serverURL" . }}
|
||||
- name: CERTCTL_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: api-key
|
||||
- name: CERTCTL_AGENT_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: CERTCTL_KEY_DIR
|
||||
value: {{ .Values.agent.keyDir }}
|
||||
{{- if .Values.agent.discoveryDirs }}
|
||||
- name: CERTCTL_DISCOVERY_DIRS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-agent
|
||||
key: discovery-dirs
|
||||
{{- end }}
|
||||
{{- with .Values.agent.env }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
resources:
|
||||
{{- toYaml .Values.agent.resources | nindent 12 }}
|
||||
volumeMounts:
|
||||
- name: agent-keys
|
||||
mountPath: {{ .Values.agent.keyDir }}
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
volumes:
|
||||
- name: agent-keys
|
||||
emptyDir:
|
||||
sizeLimit: 1Gi
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
{{- else if eq .Values.agent.kind "Deployment" }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "certctl.fullname" . }}-agent
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: agent
|
||||
spec:
|
||||
replicas: {{ .Values.agent.replicas }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "certctl.agentSelectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "certctl.agentSelectorLabels" . | nindent 8 }}
|
||||
spec:
|
||||
serviceAccountName: {{ include "certctl.serviceAccountName" . }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.agent.securityContext | nindent 8 }}
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agent.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agent.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agent.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: agent
|
||||
image: {{ include "certctl.agentImage" . }}
|
||||
imagePullPolicy: {{ .Values.agent.image.pullPolicy }}
|
||||
env:
|
||||
- name: CERTCTL_SERVER_URL
|
||||
value: {{ include "certctl.serverURL" . }}
|
||||
- name: CERTCTL_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: api-key
|
||||
- name: CERTCTL_AGENT_NAME
|
||||
{{- if .Values.agent.name }}
|
||||
value: {{ .Values.agent.name | quote }}
|
||||
{{- else }}
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
{{- end }}
|
||||
- name: CERTCTL_KEY_DIR
|
||||
value: {{ .Values.agent.keyDir }}
|
||||
{{- if .Values.agent.discoveryDirs }}
|
||||
- name: CERTCTL_DISCOVERY_DIRS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-agent
|
||||
key: discovery-dirs
|
||||
{{- end }}
|
||||
{{- with .Values.agent.env }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
resources:
|
||||
{{- toYaml .Values.agent.resources | nindent 12 }}
|
||||
volumeMounts:
|
||||
- name: agent-keys
|
||||
mountPath: {{ .Values.agent.keyDir }}
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
volumes:
|
||||
- name: agent-keys
|
||||
emptyDir:
|
||||
sizeLimit: 1Gi
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,41 @@
|
||||
{{- if .Values.ingress.enabled }}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ include "certctl.fullname" . }}
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- if .Values.ingress.className }}
|
||||
ingressClassName: {{ .Values.ingress.className }}
|
||||
{{- end }}
|
||||
{{- if .Values.ingress.tls }}
|
||||
tls:
|
||||
{{- range .Values.ingress.tls }}
|
||||
- hosts:
|
||||
{{- range .hosts }}
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
secretName: {{ .secretName }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
rules:
|
||||
{{- range .Values.ingress.hosts }}
|
||||
- host: {{ .host | quote }}
|
||||
http:
|
||||
paths:
|
||||
{{- range .paths }}
|
||||
- path: {{ .path }}
|
||||
pathType: {{ .pathType }}
|
||||
backend:
|
||||
service:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
port:
|
||||
number: {{ $.Values.server.service.port }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,12 @@
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ include "certctl.fullname" . }}-postgres
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: postgres
|
||||
type: Opaque
|
||||
stringData:
|
||||
password: {{ .Values.postgresql.auth.password | default "changeme" | quote }}
|
||||
username: {{ .Values.postgresql.auth.username | quote }}
|
||||
database: {{ .Values.postgresql.auth.database | quote }}
|
||||
@@ -0,0 +1,18 @@
|
||||
{{- if .Values.postgresql.enabled }}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "certctl.fullname" . }}-postgres
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: postgres
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- port: {{ .Values.postgresql.service.port }}
|
||||
targetPort: postgres
|
||||
protocol: TCP
|
||||
name: postgres
|
||||
selector:
|
||||
{{- include "certctl.postgresSelectorLabels" . | nindent 4 }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,79 @@
|
||||
{{- if .Values.postgresql.enabled }}
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: {{ include "certctl.fullname" . }}-postgres
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: postgres
|
||||
spec:
|
||||
serviceName: {{ include "certctl.fullname" . }}-postgres
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "certctl.postgresSelectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "certctl.postgresSelectorLabels" . | nindent 8 }}
|
||||
spec:
|
||||
securityContext:
|
||||
{{- toYaml .Values.postgresql.securityContext | nindent 8 }}
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: postgres
|
||||
image: {{ include "certctl.postgresImage" . }}
|
||||
imagePullPolicy: {{ .Values.postgresql.image.pullPolicy }}
|
||||
ports:
|
||||
- name: postgres
|
||||
containerPort: 5432
|
||||
protocol: TCP
|
||||
env:
|
||||
- name: POSTGRES_DB
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-postgres
|
||||
key: database
|
||||
- name: POSTGRES_USER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-postgres
|
||||
key: username
|
||||
- name: POSTGRES_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-postgres
|
||||
key: password
|
||||
- name: POSTGRES_INITDB_ARGS
|
||||
value: "--encoding=UTF8"
|
||||
livenessProbe:
|
||||
{{- toYaml .Values.postgresql.livenessProbe | nindent 12 }}
|
||||
readinessProbe:
|
||||
{{- toYaml .Values.postgresql.readinessProbe | nindent 12 }}
|
||||
resources:
|
||||
{{- toYaml .Values.postgresql.resources | nindent 12 }}
|
||||
volumeMounts:
|
||||
- name: postgres-data
|
||||
mountPath: /var/lib/postgresql/data
|
||||
subPath: postgres
|
||||
- name: postgres-init
|
||||
mountPath: /docker-entrypoint-initdb.d
|
||||
volumes:
|
||||
- name: postgres-init
|
||||
emptyDir: {}
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: postgres-data
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
{{- if .Values.postgresql.storage.storageClass }}
|
||||
storageClassName: {{ .Values.postgresql.storage.storageClass }}
|
||||
{{- end }}
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.postgresql.storage.size }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,36 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: server
|
||||
data:
|
||||
log-level: {{ .Values.server.logging.level | quote }}
|
||||
auth-type: {{ .Values.server.auth.type | quote }}
|
||||
keygen-mode: {{ .Values.server.keygen.mode | quote }}
|
||||
rate-limit-rps: {{ .Values.server.rateLimiting.rps | quote }}
|
||||
rate-limit-burst: {{ .Values.server.rateLimiting.burst | quote }}
|
||||
{{- if .Values.server.cors.origins }}
|
||||
cors-origins: {{ .Values.server.cors.origins | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.server.networkScan.enabled }}
|
||||
network-scan-interval: {{ .Values.server.networkScan.interval | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.server.est.enabled }}
|
||||
est-issuer-id: {{ .Values.server.est.issuerID | quote }}
|
||||
{{- if .Values.server.est.profileID }}
|
||||
est-profile-id: {{ .Values.server.est.profileID | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.server.smtp.enabled }}
|
||||
smtp-host: {{ .Values.server.smtp.host | quote }}
|
||||
smtp-port: {{ .Values.server.smtp.port | quote }}
|
||||
smtp-username: {{ .Values.server.smtp.username | quote }}
|
||||
smtp-from-address: {{ .Values.server.smtp.fromAddress | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.server.issuer.acme.enabled }}
|
||||
acme-directory-url: {{ .Values.server.issuer.acme.directoryURL | quote }}
|
||||
acme-email: {{ .Values.server.issuer.acme.email | quote }}
|
||||
acme-challenge-type: {{ .Values.server.issuer.acme.challengeType | quote }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,196 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: server
|
||||
spec:
|
||||
{{- if gt (int .Values.server.replicas) 1 }}
|
||||
replicas: {{ .Values.server.replicas }}
|
||||
{{- end }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "certctl.serverSelectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "certctl.serverSelectorLabels" . | nindent 8 }}
|
||||
annotations:
|
||||
checksum/config: {{ include (print $.Template.BasePath "/server-configmap.yaml") . | sha256sum }}
|
||||
checksum/secret: {{ include (print $.Template.BasePath "/server-secret.yaml") . | sha256sum }}
|
||||
spec:
|
||||
serviceAccountName: {{ include "certctl.serviceAccountName" . }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.server.securityContext | nindent 8 }}
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: server
|
||||
image: {{ include "certctl.serverImage" . }}
|
||||
imagePullPolicy: {{ .Values.server.image.pullPolicy }}
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: {{ .Values.server.port }}
|
||||
protocol: TCP
|
||||
env:
|
||||
- name: CERTCTL_SERVER_HOST
|
||||
value: "0.0.0.0"
|
||||
- name: CERTCTL_SERVER_PORT
|
||||
value: "{{ .Values.server.port }}"
|
||||
- name: CERTCTL_DATABASE_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: database-url
|
||||
- name: POSTGRES_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-postgres
|
||||
key: password
|
||||
- name: CERTCTL_LOG_LEVEL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: log-level
|
||||
- name: CERTCTL_LOG_FORMAT
|
||||
value: "json"
|
||||
- name: CERTCTL_AUTH_TYPE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: auth-type
|
||||
{{- if eq .Values.server.auth.type "api-key" }}
|
||||
- name: CERTCTL_AUTH_SECRET
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: api-key
|
||||
{{- end }}
|
||||
- name: CERTCTL_KEYGEN_MODE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: keygen-mode
|
||||
- name: CERTCTL_RATE_LIMIT_RPS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: rate-limit-rps
|
||||
- name: CERTCTL_RATE_LIMIT_BURST
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: rate-limit-burst
|
||||
{{- if .Values.server.cors.origins }}
|
||||
- name: CERTCTL_CORS_ORIGINS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: cors-origins
|
||||
{{- end }}
|
||||
{{- if .Values.server.networkScan.enabled }}
|
||||
- name: CERTCTL_NETWORK_SCAN_ENABLED
|
||||
value: "true"
|
||||
- name: CERTCTL_NETWORK_SCAN_INTERVAL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: network-scan-interval
|
||||
{{- end }}
|
||||
{{- if .Values.server.est.enabled }}
|
||||
- name: CERTCTL_EST_ENABLED
|
||||
value: "true"
|
||||
- name: CERTCTL_EST_ISSUER_ID
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: est-issuer-id
|
||||
{{- if .Values.server.est.profileID }}
|
||||
- name: CERTCTL_EST_PROFILE_ID
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: est-profile-id
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.server.smtp.enabled }}
|
||||
- name: CERTCTL_SMTP_HOST
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: smtp-host
|
||||
- name: CERTCTL_SMTP_PORT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: smtp-port
|
||||
- name: CERTCTL_SMTP_USERNAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: smtp-username
|
||||
- name: CERTCTL_SMTP_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: smtp-password
|
||||
- name: CERTCTL_SMTP_FROM_ADDRESS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: smtp-from-address
|
||||
{{- end }}
|
||||
{{- if .Values.server.issuer.acme.enabled }}
|
||||
- name: CERTCTL_ACME_DIRECTORY_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: acme-directory-url
|
||||
- name: CERTCTL_ACME_EMAIL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: acme-email
|
||||
- name: CERTCTL_ACME_CHALLENGE_TYPE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
key: acme-challenge-type
|
||||
{{- end }}
|
||||
{{- with .Values.server.env }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
livenessProbe:
|
||||
{{- toYaml .Values.server.livenessProbe | nindent 12 }}
|
||||
readinessProbe:
|
||||
{{- toYaml .Values.server.readinessProbe | nindent 12 }}
|
||||
resources:
|
||||
{{- toYaml .Values.server.resources | nindent 12 }}
|
||||
volumeMounts:
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
{{- if .Values.server.volumeMounts }}
|
||||
{{- toYaml .Values.server.volumeMounts | nindent 12 }}
|
||||
{{- end }}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
{{- if .Values.server.volumes }}
|
||||
{{- toYaml .Values.server.volumes | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if .Values.nodeAffinity }}
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
{{- toYaml .Values.nodeAffinity | nindent 10 }}
|
||||
{{- else if .Values.podAntiAffinity }}
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
{{- toYaml .Values.podAntiAffinity | nindent 10 }}
|
||||
{{- else if .Values.podAffinity }}
|
||||
affinity:
|
||||
podAffinity:
|
||||
{{- toYaml .Values.podAffinity | nindent 10 }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: server
|
||||
type: Opaque
|
||||
stringData:
|
||||
database-url: postgres://{{ .Values.postgresql.auth.username }}:$(POSTGRES_PASSWORD)@{{ include "certctl.fullname" . }}-postgres:5432/{{ .Values.postgresql.auth.database }}?sslmode=disable
|
||||
{{- if and (eq .Values.server.auth.type "api-key") .Values.server.auth.apiKey }}
|
||||
api-key: {{ .Values.server.auth.apiKey | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.server.smtp.enabled }}
|
||||
smtp-password: {{ .Values.server.smtp.password | quote }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,20 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: server
|
||||
{{- with .Values.server.service.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
type: {{ .Values.server.service.type }}
|
||||
ports:
|
||||
- port: {{ .Values.server.service.port }}
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
name: http
|
||||
selector:
|
||||
{{- include "certctl.serverSelectorLabels" . | nindent 4 }}
|
||||
@@ -0,0 +1,37 @@
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ include "certctl.serviceAccountName" . }}
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
{{- with .Values.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.rbac.create }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: {{ include "certctl.fullname" . }}
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
rules: []
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: {{ include "certctl.fullname" . }}
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: {{ include "certctl.fullname" . }}
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: {{ include "certctl.serviceAccountName" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,434 @@
|
||||
# Default values for certctl Helm chart
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
# Namespace override (optional)
|
||||
namespace: ""
|
||||
|
||||
# Global configuration
|
||||
commonLabels: {}
|
||||
imagePullSecrets: []
|
||||
nameOverride: ""
|
||||
fullnameOverride: ""
|
||||
|
||||
# ==============================================================================
|
||||
# Certctl Server Configuration
|
||||
# ==============================================================================
|
||||
server:
|
||||
# Number of replicas (for HA deployments)
|
||||
replicas: 1
|
||||
|
||||
# Image configuration
|
||||
image:
|
||||
repository: ghcr.io/shankar0123/certctl
|
||||
tag: "" # defaults to Chart.appVersion
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
# Server port
|
||||
port: 8443
|
||||
|
||||
# Resource requests and limits
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
|
||||
# Pod security context
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
fsGroup: 1000
|
||||
readOnlyRootFilesystem: true
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
|
||||
# Liveness and readiness probes
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 3
|
||||
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /readyz
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 2
|
||||
|
||||
# Service type (ClusterIP, LoadBalancer, NodePort)
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 8443
|
||||
annotations: {}
|
||||
|
||||
# Authentication configuration
|
||||
auth:
|
||||
type: api-key # Options: api-key, none (for demo only)
|
||||
apiKey: "" # REQUIRED in production - set via --set or values override
|
||||
|
||||
# Logging configuration
|
||||
logging:
|
||||
level: info # debug, info, warn, error
|
||||
format: json # json or text
|
||||
|
||||
# SMTP configuration for email notifications (optional)
|
||||
smtp:
|
||||
enabled: false
|
||||
host: ""
|
||||
port: 587
|
||||
username: ""
|
||||
password: ""
|
||||
fromAddress: ""
|
||||
useTLS: true
|
||||
|
||||
# Certificate digest digest (periodic email summary)
|
||||
digest:
|
||||
enabled: false
|
||||
interval: "24h"
|
||||
recipients: []
|
||||
# Example:
|
||||
# - admin@example.com
|
||||
# - ops@example.com
|
||||
|
||||
# Enrollment over Secure Transport (EST) configuration
|
||||
est:
|
||||
enabled: false
|
||||
issuerID: "iss-local"
|
||||
profileID: ""
|
||||
|
||||
# Rate limiting configuration
|
||||
rateLimiting:
|
||||
rps: 100 # Requests per second
|
||||
burst: 200 # Burst capacity
|
||||
|
||||
# Network scanning configuration
|
||||
networkScan:
|
||||
enabled: false
|
||||
interval: "6h"
|
||||
|
||||
# Certificate key generation mode
|
||||
keygen:
|
||||
mode: agent # Options: agent (production), server (demo with warning)
|
||||
|
||||
# CORS configuration
|
||||
cors:
|
||||
origins: "" # Comma-separated list, empty means deny all cross-origin requests
|
||||
|
||||
# Issuer connectors configuration
|
||||
issuer:
|
||||
local:
|
||||
enabled: true
|
||||
# For sub-CA mode, provide these paths:
|
||||
# caCertPath: /path/to/ca.crt
|
||||
# caKeyPath: /path/to/ca.key
|
||||
|
||||
acme:
|
||||
enabled: false
|
||||
directoryURL: ""
|
||||
email: ""
|
||||
challengeType: "http-01" # Options: http-01, dns-01, dns-persist-01
|
||||
# DNS configuration (for dns-01 or dns-persist-01)
|
||||
# dnsPresentScript: /path/to/dns-present.sh
|
||||
# dnsCleanupScript: /path/to/dns-cleanup.sh
|
||||
# dnsPropagationWait: "30s"
|
||||
# dnsPersistIssuerDomain: "validation.example.com"
|
||||
# EAB configuration (for ZeroSSL, Google Trust Services, etc.)
|
||||
# eabKid: ""
|
||||
# eabHmac: ""
|
||||
|
||||
stepca:
|
||||
enabled: false
|
||||
# rootCAPath: /path/to/root_ca.crt
|
||||
# intermediateCAPath: /path/to/intermediate_ca.crt
|
||||
# provisionerName: ""
|
||||
# provisionerPassword: ""
|
||||
|
||||
openssl:
|
||||
enabled: false
|
||||
# signScript: /path/to/sign.sh
|
||||
# revokeScript: /path/to/revoke.sh
|
||||
# crlScript: /path/to/crl.sh
|
||||
# timeoutSeconds: 30
|
||||
|
||||
# Notifier connectors configuration
|
||||
notifiers:
|
||||
slack:
|
||||
enabled: false
|
||||
# webhookUrl: ""
|
||||
# channel: ""
|
||||
# username: ""
|
||||
# iconEmoji: ""
|
||||
|
||||
teams:
|
||||
enabled: false
|
||||
# webhookUrl: ""
|
||||
|
||||
pagerduty:
|
||||
enabled: false
|
||||
# routingKey: ""
|
||||
# severity: warning
|
||||
|
||||
opsgenie:
|
||||
enabled: false
|
||||
# apiKey: ""
|
||||
# priority: P3
|
||||
|
||||
# Additional environment variables
|
||||
# Will be passed as-is to the server container
|
||||
env: {}
|
||||
# Example:
|
||||
# CERTCTL_SCHEDULER_RENEWAL_CHECK_INTERVAL: "1h"
|
||||
# CERTCTL_DATABASE_MAX_CONNS: "25"
|
||||
|
||||
# Additional volume mounts for custom configurations
|
||||
# volumeMounts: []
|
||||
# - name: ca-cert
|
||||
# mountPath: /etc/ssl/certs/ca.crt
|
||||
# subPath: ca.crt
|
||||
|
||||
# Additional volumes
|
||||
# volumes: []
|
||||
# - name: ca-cert
|
||||
# secret:
|
||||
# secretName: ca-cert
|
||||
|
||||
# ==============================================================================
|
||||
# PostgreSQL Configuration
|
||||
# ==============================================================================
|
||||
postgresql:
|
||||
# Enable/disable PostgreSQL (set to false if using external database)
|
||||
enabled: true
|
||||
|
||||
# Image configuration
|
||||
image:
|
||||
repository: postgres
|
||||
tag: "16-alpine"
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
# Authentication
|
||||
auth:
|
||||
database: certctl
|
||||
username: certctl
|
||||
password: "" # REQUIRED - set via --set or values override
|
||||
|
||||
# Storage configuration
|
||||
storage:
|
||||
size: 10Gi
|
||||
storageClass: "" # Uses default StorageClass if empty
|
||||
# deleteOnTermination: false # Keep data on Helm uninstall
|
||||
|
||||
# Resource requests and limits
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
|
||||
# Pod security context
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
runAsGroup: 999
|
||||
fsGroup: 999
|
||||
|
||||
# Liveness and readiness probes
|
||||
livenessProbe:
|
||||
exec:
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- pg_isready -U certctl -d certctl
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 3
|
||||
|
||||
readinessProbe:
|
||||
exec:
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- pg_isready -U certctl -d certctl
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 2
|
||||
|
||||
# Service configuration
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 5432
|
||||
|
||||
# PostgreSQL-specific settings
|
||||
postgresqlConfig: {}
|
||||
# Example:
|
||||
# max_connections: "200"
|
||||
# shared_buffers: "256MB"
|
||||
|
||||
# ==============================================================================
|
||||
# Certctl Agent Configuration
|
||||
# ==============================================================================
|
||||
agent:
|
||||
# Enable/disable agent deployment
|
||||
enabled: true
|
||||
|
||||
# Deployment strategy: DaemonSet (recommended) or Deployment
|
||||
kind: DaemonSet # Options: DaemonSet, Deployment
|
||||
|
||||
# Image configuration
|
||||
image:
|
||||
repository: ghcr.io/shankar0123/certctl-agent
|
||||
tag: "" # defaults to Chart.appVersion
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
# Number of replicas (for Deployment kind; ignored for DaemonSet)
|
||||
replicas: 1
|
||||
|
||||
# Resource requests and limits
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 256Mi
|
||||
|
||||
# Pod security context
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
fsGroup: 1000
|
||||
readOnlyRootFilesystem: true
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
|
||||
# Agent name (can be overridden per pod via StatefulSet ordinals)
|
||||
name: "" # If empty, uses release name
|
||||
|
||||
# Key storage directory
|
||||
keyDir: /var/lib/certctl/keys
|
||||
|
||||
# Certificate discovery directories (comma-separated)
|
||||
discoveryDirs: ""
|
||||
# Example: "/etc/ssl/certs,/etc/pki/tls"
|
||||
|
||||
# Node selector for agent pods (for DaemonSet)
|
||||
nodeSelector: {}
|
||||
# Example:
|
||||
# node-role.kubernetes.io/worker: "true"
|
||||
|
||||
# Tolerations for agent pods
|
||||
tolerations: []
|
||||
# Example:
|
||||
# - key: node-role
|
||||
# operator: Equal
|
||||
# value: worker
|
||||
# effect: NoSchedule
|
||||
|
||||
# Affinity rules
|
||||
affinity: {}
|
||||
|
||||
# Additional environment variables
|
||||
env: {}
|
||||
|
||||
# ==============================================================================
|
||||
# Ingress Configuration
|
||||
# ==============================================================================
|
||||
ingress:
|
||||
enabled: false
|
||||
className: ""
|
||||
annotations: {}
|
||||
# kubernetes.io/ingress.class: nginx
|
||||
# cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
hosts:
|
||||
- host: certctl.local
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls: []
|
||||
# - secretName: certctl-tls
|
||||
# hosts:
|
||||
# - certctl.local
|
||||
|
||||
# ==============================================================================
|
||||
# Service Account Configuration
|
||||
# ==============================================================================
|
||||
serviceAccount:
|
||||
create: true
|
||||
annotations: {}
|
||||
name: "" # defaults to release name if empty
|
||||
|
||||
# ==============================================================================
|
||||
# RBAC Configuration
|
||||
# ==============================================================================
|
||||
rbac:
|
||||
create: true
|
||||
|
||||
# ==============================================================================
|
||||
# Pod Disruption Budget (for HA deployments)
|
||||
# ==============================================================================
|
||||
podDisruptionBudget:
|
||||
enabled: false
|
||||
minAvailable: 1
|
||||
# maxUnavailable: 1
|
||||
|
||||
# ==============================================================================
|
||||
# Monitoring Configuration
|
||||
# ==============================================================================
|
||||
monitoring:
|
||||
enabled: false
|
||||
# Prometheus ServiceMonitor
|
||||
serviceMonitor:
|
||||
enabled: false
|
||||
interval: 30s
|
||||
scrapeTimeout: 10s
|
||||
# labels: {}
|
||||
# selector: {}
|
||||
|
||||
# ==============================================================================
|
||||
# Advanced Configuration
|
||||
# ==============================================================================
|
||||
|
||||
# Node affinity for server pods
|
||||
nodeAffinity: {}
|
||||
|
||||
# Pod affinity for server pods
|
||||
podAffinity: {}
|
||||
|
||||
# Pod anti-affinity for server pods (for HA)
|
||||
podAntiAffinity: {}
|
||||
# Example:
|
||||
# podAntiAffinity:
|
||||
# preferredDuringSchedulingIgnoredDuringExecution:
|
||||
# - weight: 100
|
||||
# podAffinityTerm:
|
||||
# labelSelector:
|
||||
# matchExpressions:
|
||||
# - key: app.kubernetes.io/name
|
||||
# operator: In
|
||||
# values:
|
||||
# - certctl
|
||||
# topologyKey: kubernetes.io/hostname
|
||||
|
||||
# Custom labels for all resources
|
||||
customLabels: {}
|
||||
|
||||
# Custom annotations for all resources
|
||||
customAnnotations: {}
|
||||
@@ -0,0 +1,77 @@
|
||||
# Certctl with ACME DNS-01 Challenge (Let's Encrypt)
|
||||
# Enables automatic certificate issuance from Let's Encrypt
|
||||
# using DNS-01 verification (wildcard-capable)
|
||||
|
||||
server:
|
||||
auth:
|
||||
type: api-key
|
||||
apiKey: "CHANGE_ME"
|
||||
|
||||
issuer:
|
||||
local:
|
||||
enabled: true
|
||||
|
||||
acme:
|
||||
enabled: true
|
||||
directoryURL: https://acme-v02.api.letsencrypt.org/directory
|
||||
email: admin@example.com
|
||||
challengeType: dns-01
|
||||
dnsPresentScript: /scripts/dns-present.sh
|
||||
dnsCleanupScript: /scripts/dns-cleanup.sh
|
||||
dnsPropagationWait: 30s
|
||||
# For DNS-PERSIST-01 (standing validation record, no per-renewal updates):
|
||||
# challengeType: dns-persist-01
|
||||
# dnsPersistIssuerDomain: validation.example.com
|
||||
|
||||
# Mount DNS scripts as ConfigMap
|
||||
volumes:
|
||||
- name: dns-scripts
|
||||
configMap:
|
||||
name: dns-scripts
|
||||
defaultMode: 0755
|
||||
|
||||
volumeMounts:
|
||||
- name: dns-scripts
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
|
||||
postgresql:
|
||||
enabled: true
|
||||
storage:
|
||||
size: 20Gi
|
||||
|
||||
agent:
|
||||
enabled: true
|
||||
kind: DaemonSet
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: nginx
|
||||
hosts:
|
||||
- host: certctl.example.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
|
||||
---
|
||||
# You'll need to create the DNS scripts ConfigMap separately:
|
||||
#
|
||||
# kubectl create configmap dns-scripts \
|
||||
# --from-file=dns-present.sh=./scripts/dns-present.sh \
|
||||
# --from-file=dns-cleanup.sh=./scripts/dns-cleanup.sh
|
||||
#
|
||||
# Example dns-present.sh (Cloudflare):
|
||||
# #!/bin/bash
|
||||
# DOMAIN=$1
|
||||
# TOKEN=$2
|
||||
#
|
||||
# curl -X POST "https://api.cloudflare.com/client/v4/zones/{zone_id}/dns_records" \
|
||||
# -H "Authorization: Bearer ${CLOUDFLARE_API_TOKEN}" \
|
||||
# -d "{\"type\":\"TXT\",\"name\":\"_acme-challenge.${DOMAIN}\",\"content\":\"${TOKEN}\"}"
|
||||
#
|
||||
# Example dns-cleanup.sh (Cloudflare):
|
||||
# #!/bin/bash
|
||||
# DOMAIN=$1
|
||||
#
|
||||
# curl -X DELETE "https://api.cloudflare.com/client/v4/zones/{zone_id}/dns_records/{record_id}" \
|
||||
# -H "Authorization: Bearer ${CLOUDFLARE_API_TOKEN}"
|
||||
@@ -0,0 +1,99 @@
|
||||
# Certctl Development Configuration
|
||||
# Lightweight setup for development and testing
|
||||
# - Single server replica
|
||||
# - Small PostgreSQL storage
|
||||
# - Minimal resource limits
|
||||
# - No ingress or monitoring
|
||||
# - Demo auth mode (no API key required)
|
||||
|
||||
server:
|
||||
replicas: 1
|
||||
|
||||
image:
|
||||
repository: ghcr.io/shankar0123/certctl
|
||||
pullPolicy: IfNotPresent # Use latest tag
|
||||
|
||||
port: 8443
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 256Mi
|
||||
|
||||
auth:
|
||||
type: none # Demo mode - no authentication
|
||||
|
||||
logging:
|
||||
level: debug
|
||||
format: json
|
||||
|
||||
service:
|
||||
type: LoadBalancer # Easy external access for dev
|
||||
|
||||
issuer:
|
||||
local:
|
||||
enabled: true
|
||||
|
||||
rateLimiting:
|
||||
rps: 100
|
||||
burst: 200
|
||||
|
||||
postgresql:
|
||||
enabled: true
|
||||
|
||||
image:
|
||||
repository: postgres
|
||||
tag: "16-alpine"
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
auth:
|
||||
database: certctl
|
||||
username: certctl
|
||||
password: "dev-password-change-me"
|
||||
|
||||
storage:
|
||||
size: 5Gi
|
||||
storageClass: "" # Use default storage class
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 256Mi
|
||||
|
||||
agent:
|
||||
enabled: true
|
||||
kind: Deployment
|
||||
replicas: 1
|
||||
|
||||
image:
|
||||
repository: ghcr.io/shankar0123/certctl-agent
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 32Mi
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
|
||||
ingress:
|
||||
enabled: false
|
||||
|
||||
serviceAccount:
|
||||
create: true
|
||||
|
||||
rbac:
|
||||
create: true
|
||||
|
||||
monitoring:
|
||||
enabled: false
|
||||
|
||||
customLabels:
|
||||
environment: development
|
||||
@@ -0,0 +1,50 @@
|
||||
# Certctl with External PostgreSQL Database
|
||||
# Use this when PostgreSQL is managed externally:
|
||||
# - AWS RDS
|
||||
# - Cloud SQL (Google Cloud)
|
||||
# - Azure Database for PostgreSQL
|
||||
# - Self-managed PostgreSQL server
|
||||
|
||||
server:
|
||||
replicas: 2
|
||||
|
||||
auth:
|
||||
type: api-key
|
||||
apiKey: "CHANGE_ME"
|
||||
|
||||
issuer:
|
||||
local:
|
||||
enabled: true
|
||||
|
||||
# Pass external database URL via environment variable
|
||||
env:
|
||||
CERTCTL_DATABASE_URL: "postgres://certctl:CHANGE_ME@postgres.example.com:5432/certctl?sslmode=require"
|
||||
|
||||
# Disable internal PostgreSQL
|
||||
postgresql:
|
||||
enabled: false
|
||||
|
||||
agent:
|
||||
enabled: true
|
||||
kind: DaemonSet
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: nginx
|
||||
hosts:
|
||||
- host: certctl.example.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
|
||||
# For AWS RDS with IAM authentication:
|
||||
# env:
|
||||
# CERTCTL_DATABASE_URL: "postgres://certctl:CHANGE_ME@mydb.123456789.us-east-1.rds.amazonaws.com:5432/certctl?sslmode=require"
|
||||
|
||||
# For Google Cloud SQL:
|
||||
# env:
|
||||
# CERTCTL_DATABASE_URL: "postgres://certctl:CHANGE_ME@/certctl?host=/cloudsql/PROJECT:REGION:INSTANCE&sslmode=require"
|
||||
|
||||
# For Azure Database:
|
||||
# env:
|
||||
# CERTCTL_DATABASE_URL: "postgres://certctl@servername:CHANGE_ME@servername.postgres.database.azure.com:5432/certctl?sslmode=require"
|
||||
@@ -0,0 +1,159 @@
|
||||
# Certctl Production HA Configuration
|
||||
# High availability deployment with:
|
||||
# - 3 server replicas with pod anti-affinity
|
||||
# - Large PostgreSQL storage
|
||||
# - Resource limits for production
|
||||
# - Prometheus monitoring
|
||||
# - Network policies enforcement
|
||||
|
||||
namespace: certctl
|
||||
|
||||
server:
|
||||
replicas: 3
|
||||
|
||||
image:
|
||||
repository: ghcr.io/shankar0123/certctl
|
||||
tag: "2.1.0"
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
port: 8443
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 1000m
|
||||
memory: 512Mi
|
||||
|
||||
auth:
|
||||
type: api-key
|
||||
apiKey: "CHANGE_ME_IN_PRODUCTION" # Use --set or sealed-secrets
|
||||
|
||||
logging:
|
||||
level: info
|
||||
format: json
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8443"
|
||||
prometheus.io/path: "/api/v1/metrics/prometheus"
|
||||
|
||||
issuer:
|
||||
local:
|
||||
enabled: true
|
||||
acme:
|
||||
enabled: true
|
||||
directoryURL: https://acme-v02.api.letsencrypt.org/directory
|
||||
email: admin@example.com
|
||||
challengeType: dns-01
|
||||
|
||||
rateLimiting:
|
||||
rps: 500
|
||||
burst: 1000
|
||||
|
||||
postgresql:
|
||||
enabled: true
|
||||
|
||||
image:
|
||||
repository: postgres
|
||||
tag: "16-alpine"
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
auth:
|
||||
database: certctl
|
||||
username: certctl
|
||||
password: "CHANGE_ME_IN_PRODUCTION" # Use --set or sealed-secrets
|
||||
|
||||
storage:
|
||||
size: 100Gi
|
||||
storageClass: "fast-ssd" # Use your high-performance storage class
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
cpu: 2000m
|
||||
memory: 2Gi
|
||||
|
||||
agent:
|
||||
enabled: true
|
||||
kind: DaemonSet
|
||||
|
||||
image:
|
||||
repository: ghcr.io/shankar0123/certctl-agent
|
||||
tag: "2.1.0"
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 256Mi
|
||||
|
||||
discoveryDirs: "/etc/ssl/certs,/etc/pki/tls,/etc/ssl"
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: nginx
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
hosts:
|
||||
- host: certctl.example.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- secretName: certctl-tls
|
||||
hosts:
|
||||
- certctl.example.com
|
||||
|
||||
serviceAccount:
|
||||
create: true
|
||||
annotations:
|
||||
eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT:role/certctl-role # For IRSA on AWS
|
||||
|
||||
rbac:
|
||||
create: true
|
||||
|
||||
podDisruptionBudget:
|
||||
enabled: true
|
||||
minAvailable: 2
|
||||
|
||||
monitoring:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
interval: 30s
|
||||
scrapeTimeout: 10s
|
||||
|
||||
# Pod anti-affinity for HA
|
||||
podAntiAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
- labelSelector:
|
||||
matchExpressions:
|
||||
- key: app.kubernetes.io/name
|
||||
operator: In
|
||||
values:
|
||||
- certctl
|
||||
- key: app.kubernetes.io/component
|
||||
operator: In
|
||||
values:
|
||||
- server
|
||||
topologyKey: kubernetes.io/hostname
|
||||
|
||||
customLabels:
|
||||
environment: production
|
||||
team: platform
|
||||
cost-center: ops
|
||||
|
||||
customAnnotations:
|
||||
slack-alerts: "#ops"
|
||||
backup-policy: daily
|
||||
@@ -1,5 +1,41 @@
|
||||
# Architecture Guide
|
||||
|
||||
## Contents
|
||||
|
||||
1. [Overview](#overview)
|
||||
2. [System Components](#system-components)
|
||||
- [Control Plane (Server)](#control-plane-server)
|
||||
- [Agents](#agents)
|
||||
- [Web Dashboard](#web-dashboard)
|
||||
- [PostgreSQL Database](#postgresql-database)
|
||||
3. [Data Flow: Certificate Lifecycle](#data-flow-certificate-lifecycle)
|
||||
- [Create Managed Certificate](#1-create-managed-certificate)
|
||||
- [Certificate Issuance](#2-certificate-issuance)
|
||||
- [Deploy Certificate to Target](#3-deploy-certificate-to-target)
|
||||
- [Revoke a Certificate](#35-revoke-a-certificate)
|
||||
- [Automatic Renewal](#4-automatic-renewal)
|
||||
4. [Connector Architecture](#connector-architecture)
|
||||
- [IssuerConnectorAdapter (Dependency Inversion)](#issuerconnectoradapter-dependency-inversion)
|
||||
- [Issuer Connector](#issuer-connector)
|
||||
- [Target Connector](#target-connector)
|
||||
- [Notifier Connector](#notifier-connector)
|
||||
- [EST Server (RFC 7030)](#est-server-rfc-7030)
|
||||
5. [Security Model](#security-model)
|
||||
- [Private Key Management](#private-key-management)
|
||||
- [Authentication](#authentication)
|
||||
- [Audit Trail](#audit-trail)
|
||||
- [API Audit Log](#api-audit-log)
|
||||
- [Logging](#logging)
|
||||
6. [API Design](#api-design)
|
||||
7. [MCP Server](#mcp-server)
|
||||
8. [CLI Tool](#cli-tool)
|
||||
9. [Deployment Topologies](#deployment-topologies)
|
||||
- [Docker Compose (Development / Small Deployments)](#docker-compose-development--small-deployments)
|
||||
- [Production (Kubernetes)](#production-kubernetes)
|
||||
10. [Discovery Data Flow (M18b + M21)](#discovery-data-flow-m18b--m21)
|
||||
11. [Testing Strategy](#testing-strategy)
|
||||
12. [What's Next](#whats-next)
|
||||
|
||||
## Overview
|
||||
|
||||
Certctl is a certificate management platform with a **decoupled control-plane and agent architecture**. The control plane orchestrates certificate issuance and renewal, while agents deployed across your infrastructure handle key generation, certificate deployment, and local validation — private keys never leave the infrastructure they were generated on.
|
||||
@@ -9,11 +45,13 @@ New to certificates? Read the [Concepts Guide](concepts.md) first.
|
||||
### Design Principles
|
||||
|
||||
1. **Private Key Isolation** — Agents generate ECDSA P-256 keys locally and submit CSRs only. Private keys never touch the control plane. Server-side keygen available via `CERTCTL_KEYGEN_MODE=server` for demo only.
|
||||
2. **GUI as Primary Interface** — The web dashboard is the operational control plane, not a secondary viewer. Every backend feature ships with its corresponding GUI surface.
|
||||
3. **Decoupled Operations** — Agents operate autonomously; the control plane coordinates but doesn't block agent function
|
||||
4. **Audit-First** — Complete traceability of all issuance, deployment, and rotation events
|
||||
5. **Connector Architecture** — Pluggable issuers, targets, and notifiers for extensibility
|
||||
6. **Self-Hosted** — No cloud lock-in; run with Docker Compose, Kubernetes, or bare metal
|
||||
2. **Pull-Only Deployment** — The server never initiates outbound connections to agents or targets. Agents poll for work. For network appliances and agentless targets, a proxy agent in the same network zone executes deployments via the target's API. This keeps the control plane firewalled off and limits credential scope to the proxy agent's zone.
|
||||
3. **Sub-CA Capable** — The Local CA can operate as a subordinate CA under an enterprise root (e.g., ADCS). Load a pre-signed CA cert+key from disk and all issued certs chain to the enterprise trust hierarchy. Self-signed mode remains the default for development/demos.
|
||||
4. **GUI as Primary Interface** — The web dashboard is the operational control plane, not a secondary viewer. Every backend feature ships with its corresponding GUI surface.
|
||||
5. **Decoupled Operations** — Agents operate autonomously; the control plane coordinates but doesn't block agent function
|
||||
6. **Audit-First** — Complete traceability of all issuance, deployment, and rotation events
|
||||
7. **Connector Architecture** — Pluggable issuers, targets, and notifiers for extensibility
|
||||
8. **Self-Hosted** — No cloud lock-in; run with Docker Compose, Kubernetes, or bare metal
|
||||
|
||||
## System Components
|
||||
|
||||
@@ -23,12 +61,12 @@ flowchart TB
|
||||
API["REST API\n(Go net/http, :8443)"]
|
||||
SVC["Service Layer"]
|
||||
REPO["Repository Layer\n(database/sql + lib/pq)"]
|
||||
SCHED["Background Scheduler\n4 loops"]
|
||||
SCHED["Background Scheduler\n6 loops"]
|
||||
DASH["Web Dashboard\n(React SPA)"]
|
||||
end
|
||||
|
||||
subgraph "Data Store"
|
||||
PG[("PostgreSQL 16\n14 tables\nTEXT primary keys")]
|
||||
PG[("PostgreSQL 16\n21 tables\nTEXT primary keys")]
|
||||
end
|
||||
|
||||
subgraph "Agent Fleet"
|
||||
@@ -38,18 +76,19 @@ flowchart TB
|
||||
end
|
||||
|
||||
subgraph "Issuer Backends"
|
||||
CA1["Local CA\n(crypto/x509)"]
|
||||
CA2["ACME\n(Let's Encrypt)"]
|
||||
CA3["step-ca\n(planned)"]
|
||||
CA4["OpenSSL / Custom CA\n(planned)"]
|
||||
CA5["ADCS\n(planned)"]
|
||||
CA1["Local CA\n(crypto/x509, sub-CA)"]
|
||||
CA2["ACME\n(HTTP-01 + DNS-01 + DNS-PERSIST-01)\n(EAB, ZeroSSL auto-EAB)"]
|
||||
CA3["step-ca\n(/sign API)"]
|
||||
CA4["OpenSSL / Custom CA\n(script-based)"]
|
||||
CA6["Vault PKI\n(planned)"]
|
||||
end
|
||||
|
||||
subgraph "Target Systems"
|
||||
T1["NGINX\n(file write + reload)"]
|
||||
T2["F5 BIG-IP\n(iControl REST, planned)"]
|
||||
T3["IIS\n(WinRM, planned)"]
|
||||
T4["Apache httpd\n(file write + reload)"]
|
||||
T5["HAProxy\n(combined PEM + reload)"]
|
||||
T2["F5 BIG-IP\n(proxy agent + iControl REST, planned)"]
|
||||
T3["IIS\n(agent-local PowerShell, planned)"]
|
||||
end
|
||||
|
||||
DASH --> API
|
||||
@@ -73,29 +112,31 @@ The control plane is a Go HTTP server backed by PostgreSQL. It manages state (ce
|
||||
|
||||
The server exposes a REST API under `/api/v1/` and optionally serves the web dashboard as static files from the `web/` directory.
|
||||
|
||||
**Key internals**: The server uses Go 1.22's `net/http` stdlib routing (no external router framework), structured logging via `slog`, and a handler → service → repository layered architecture. Handlers define their own service interfaces for clean dependency inversion.
|
||||
**Key internals**: The server uses Go 1.25's `net/http` stdlib routing (no external router framework), structured logging via `slog`, and a handler → service → repository layered architecture. Handlers define their own service interfaces for clean dependency inversion.
|
||||
|
||||
### Agents
|
||||
|
||||
Lightweight Go processes that run on or near your infrastructure. Agents generate ECDSA P-256 private keys locally, create CSRs, and submit them to the control plane for signing — private keys never leave agent infrastructure. Agents also handle certificate deployment to target systems (NGINX fully implemented; Apache httpd, HAProxy planned for V2; F5 BIG-IP, IIS interface only with V2 implementations planned) and report job status. They communicate with the control plane via HTTP and authenticate with API keys.
|
||||
Lightweight Go processes that run on or near your infrastructure. Agents generate ECDSA P-256 private keys locally, create CSRs, and submit them to the control plane for signing — private keys never leave agent infrastructure. Agents also handle certificate deployment to target systems (NGINX, Apache httpd, HAProxy fully implemented; F5 BIG-IP, IIS interface only with V2 implementations planned) and report job status. They communicate with the control plane via HTTP and authenticate with API keys.
|
||||
|
||||
The agent runs two background loops: a heartbeat (every 60 seconds) to signal it's alive, and a work poll (every 30 seconds) to check for actionable jobs via `GET /api/v1/agents/{id}/work`. Jobs may be `AwaitingCSR` (agent needs to generate key + submit CSR) or `Deployment` (agent needs to deploy a certificate). Private keys are stored in `CERTCTL_KEY_DIR` (default `/var/lib/certctl/keys`) with 0600 permissions.
|
||||
|
||||
**Planned (V2):** Agent metadata collection — agents will report OS, platform, architecture, IP address, and hostname via heartbeat using `runtime.GOOS`, `runtime.GOARCH`, and `net` stdlib. This metadata enables dynamic device grouping, allowing policies to be scoped by agent criteria (e.g., all Ubuntu agents, all agents in a specific subnet) rather than requiring manual per-certificate assignment.
|
||||
**Agent metadata (M10):** Agents report OS, architecture, IP address, hostname, and version via heartbeat using `runtime.GOOS`, `runtime.GOARCH`, and `net` stdlib. This metadata is stored on the `agents` table and displayed in the GUI (agent list shows OS/Arch column, detail page shows full system info).
|
||||
|
||||
**Agent groups (M11b):** Dynamic device grouping allows organizing agents by metadata criteria. Agent groups can match by OS, architecture, IP CIDR, and version. Groups support both dynamic matching (agents automatically join when criteria match) and manual membership (explicit include/exclude). Renewal policies can be scoped to agent groups via the `agent_group_id` foreign key. The GUI provides full CRUD management for agent groups with visual match criteria badges.
|
||||
|
||||
### Web Dashboard
|
||||
|
||||
The web dashboard is the primary operational interface for certctl. It is built with Vite + React + TypeScript and uses TanStack Query for server state management (caching, background refetching, optimistic updates).
|
||||
|
||||
**Current views**: certificate inventory (list with "New Certificate" creation modal + detail with version history, deploy, archive, and trigger renewal actions), agent fleet (health indicators from heartbeat), job queue (status, retry, cancel), notification inbox (threshold alert grouping, mark-as-read), audit trail (time range and actor/action filters), policy management (rules with enable/disable toggle + delete + violations), issuers (list with test connection + delete), targets (list with delete), and a summary dashboard.
|
||||
**Current views** (21 pages): certificate inventory (list with multi-select bulk operations + "New Certificate" creation modal + detail with deployment status timeline, inline policy/profile editor, version history, deploy, revoke, archive, and trigger renewal actions), agent fleet (list + detail with system info + OS/architecture grouping with charts), job queue (status, retry, cancel, approve/reject for AwaitingApproval jobs), notification inbox (threshold alert grouping, mark-as-read), audit trail (time range, actor, action filters + CSV/JSON export), policy management (rules with enable/disable toggle + delete + violations), issuers (list with test connection + delete), targets (list with 3-step configuration wizard + delete), owners (list with team resolution + delete), teams (list with delete), agent groups (list with dynamic match criteria badges + enable/disable + delete), certificate profiles (list with crypto constraints), short-lived credentials dashboard (TTL countdown, profile filtering, auto-refresh), discovered certificates triage (claim/dismiss unmanaged certs discovered by agents or network scans), network scan targets management (CRUD for network scan targets + Scan Now button), summary dashboard with charts (expiration heatmap, renewal success rate, status distribution, issuance rate), and login page.
|
||||
|
||||
The dashboard includes an **ErrorBoundary component** for graceful error recovery — if a view crashes, the boundary catches the error and displays a user-friendly message instead of breaking the entire dashboard. It also includes a **demo mode** that activates when the API is unreachable — it renders realistic mock data for screenshots and offline presentations.
|
||||
|
||||
**Tech decisions**:
|
||||
- Vite for fast builds and HMR during development
|
||||
- TanStack Query over manual fetch/useEffect for automatic cache invalidation and refetching
|
||||
- Dark theme default (ops teams live in dark mode)
|
||||
- SSE/WebSocket planned for real-time job status updates (V2.0)
|
||||
- Light content area with branded dark teal sidebar, Inter + JetBrains Mono typography
|
||||
- SSE/WebSocket planned for real-time job status updates
|
||||
|
||||
### PostgreSQL Database
|
||||
|
||||
@@ -117,6 +158,11 @@ erDiagram
|
||||
managed_certificates ||--o{ policy_violations : "violates"
|
||||
managed_certificates ||--o{ audit_events : "logged in"
|
||||
managed_certificates ||--o{ notification_events : "generates"
|
||||
managed_certificates ||--o{ certificate_revocations : "revoked via"
|
||||
agent_groups ||--o{ agent_group_members : "has members"
|
||||
agents ||--o{ agent_group_members : "belongs to"
|
||||
agents ||--o{ discovered_certificates : "discovers"
|
||||
agents ||--o{ discovery_scans : "performs"
|
||||
|
||||
teams {
|
||||
text id PK
|
||||
@@ -157,6 +203,10 @@ erDiagram
|
||||
text hostname
|
||||
text status
|
||||
text api_key_hash
|
||||
varchar os
|
||||
varchar architecture
|
||||
varchar ip_address
|
||||
varchar version
|
||||
}
|
||||
deployment_targets {
|
||||
text id PK
|
||||
@@ -211,6 +261,63 @@ erDiagram
|
||||
text recipient
|
||||
text status
|
||||
}
|
||||
certificate_profiles {
|
||||
text id PK
|
||||
text name
|
||||
text description
|
||||
jsonb allowed_key_types
|
||||
int max_validity_days
|
||||
}
|
||||
agent_groups {
|
||||
text id PK
|
||||
text name
|
||||
text description
|
||||
jsonb match_criteria
|
||||
boolean enabled
|
||||
}
|
||||
agent_group_members {
|
||||
text id PK
|
||||
text agent_group_id FK
|
||||
text agent_id FK
|
||||
text membership_type
|
||||
}
|
||||
renewal_policies {
|
||||
text id PK
|
||||
text certificate_id FK
|
||||
int renewal_days_before
|
||||
jsonb alert_thresholds_days
|
||||
boolean auto_renew
|
||||
text agent_group_id FK
|
||||
}
|
||||
certificate_revocations {
|
||||
text id PK
|
||||
text certificate_id FK
|
||||
text serial_number
|
||||
text reason
|
||||
timestamp revoked_at
|
||||
boolean issuer_notified
|
||||
}
|
||||
discovered_certificates {
|
||||
text id PK
|
||||
text agent_id FK
|
||||
text fingerprint_sha256
|
||||
text common_name
|
||||
text source_path
|
||||
text status
|
||||
}
|
||||
discovery_scans {
|
||||
text id PK
|
||||
text agent_id FK
|
||||
int certs_found
|
||||
timestamp scanned_at
|
||||
}
|
||||
network_scan_targets {
|
||||
text id PK
|
||||
text name
|
||||
text[] cidrs
|
||||
int[] ports
|
||||
boolean enabled
|
||||
}
|
||||
```
|
||||
|
||||
Migrations are idempotent (`IF NOT EXISTS` on all CREATE statements, `ON CONFLICT (id) DO NOTHING` on all seed data) so they're safe to run multiple times — important for Docker Compose where both initdb and the server may run the same SQL.
|
||||
@@ -274,6 +381,8 @@ sequenceDiagram
|
||||
Note over A: Agent deploys using locally-held private key
|
||||
```
|
||||
|
||||
**Profile enforcement:** If the certificate is assigned to a profile (`certificate_profile_id`), the profile's `allowed_key_algorithms` and `max_validity_days` constraints are checked during CSR validation. A CSR with a disallowed key type or a validity period exceeding the profile maximum is rejected before reaching the issuer connector.
|
||||
|
||||
#### Server-Side Key Generation (Demo Only)
|
||||
|
||||
Set `CERTCTL_KEYGEN_MODE=server` for development/demo with Local CA. The control plane generates RSA-2048 keys server-side. A log warning is emitted at startup.
|
||||
@@ -301,15 +410,47 @@ sequenceDiagram
|
||||
|
||||
The agent deploys certificates using target connectors. Each connector knows how to push certificates to a specific system:
|
||||
|
||||
- **NGINX**: Writes cert/chain files to disk, validates config with `nginx -t`, reloads with `nginx -s reload` or `systemctl reload nginx`
|
||||
- **F5 BIG-IP**: Calls the F5 REST API to upload certificate and update virtual server bindings
|
||||
- **IIS**: Uses WinRM to import the certificate into the Windows certificate store and bind it to an IIS site
|
||||
- **NGINX**: Writes cert/chain/key files to disk, validates config with `nginx -t`, reloads with `nginx -s reload` or `systemctl reload nginx`
|
||||
- **Apache httpd**: Writes separate cert/chain/key files, validates with `apachectl configtest`, graceful reload
|
||||
- **HAProxy**: Builds a combined PEM file (cert + chain + key), optionally validates config, reloads via systemctl or signal
|
||||
- **F5 BIG-IP** (planned): A proxy agent in the same network zone calls the iControl REST API to upload certificate and update SSL profile bindings. The server assigns the work; the proxy agent executes it.
|
||||
- **IIS** (planned, dual-mode): (1) Agent-local (recommended) — a Windows agent on the IIS box runs PowerShell `Import-PfxCertificate` + `Set-WebBinding` directly. (2) Proxy agent WinRM — for agentless IIS targets, a nearby Windows agent reaches the IIS box via WinRM.
|
||||
|
||||
The agent handles both the certificate (public) and the private key (read from local key store at `CERTCTL_KEY_DIR`). The control plane never sees the private key.
|
||||
The agent handles both the certificate (public) and the private key (read from local key store at `CERTCTL_KEY_DIR`). The control plane never sees the private key and never initiates outbound connections to agents or targets (pull-only model).
|
||||
|
||||
### 3.5 Revoke a Certificate
|
||||
|
||||
When a certificate needs immediate revocation (key compromise, decommission, etc.), the control plane executes a 7-step process:
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as User / API Client
|
||||
participant API as REST API
|
||||
participant SVC as CertificateService
|
||||
participant DB as PostgreSQL
|
||||
participant ISS as Issuer Connector
|
||||
participant NOT as Notification Service
|
||||
|
||||
U->>API: POST /api/v1/certificates/{id}/revoke<br/>{reason: "keyCompromise"}
|
||||
API->>SVC: RevokeCertificateWithActor(id, reason, actor)
|
||||
SVC->>DB: Validate cert is not already revoked/archived
|
||||
SVC->>DB: Get latest certificate version (serial number)
|
||||
SVC->>DB: UPDATE managed_certificates SET status='Revoked'
|
||||
SVC->>DB: INSERT INTO certificate_revocations<br/>(ON CONFLICT DO NOTHING for idempotency)
|
||||
SVC->>ISS: RevokeCertificate(serial, reason)<br/>(best-effort — failure doesn't block)
|
||||
SVC->>DB: INSERT audit_event (certificate_revoked)
|
||||
SVC->>NOT: SendRevocationNotification(cert, reason)
|
||||
SVC-->>API: Updated certificate with Revoked status
|
||||
API-->>U: 200 OK
|
||||
```
|
||||
|
||||
The revocation is recorded in the `certificate_revocations` table (separate from the certificate status update) for CRL generation. The DER-encoded CRL at `GET /api/v1/crl/{issuer_id}` is generated on-demand by querying this table and signing with the issuing CA's key. The OCSP responder at `GET /api/v1/ocsp/{issuer_id}/{serial}` checks both the certificate status and the revocations table to return signed good/revoked/unknown responses.
|
||||
|
||||
Short-lived certificates (those with profile TTL < 1 hour) return "good" from OCSP and are excluded from CRL — their rapid expiry is treated as sufficient revocation.
|
||||
|
||||
### 4. Automatic Renewal
|
||||
|
||||
The control plane runs a scheduler with four background loops:
|
||||
The control plane runs a scheduler with seven background loops:
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
@@ -318,12 +459,18 @@ flowchart LR
|
||||
J["Job Processor\n⏱ every 30s"]
|
||||
H["Agent Health\n⏱ every 2m"]
|
||||
N["Notification Processor\n⏱ every 1m"]
|
||||
SL["Short-Lived Expiry\n⏱ every 30s"]
|
||||
NS["Network Scanner\n⏱ every 6h"]
|
||||
DG["Certificate Digest\n⏱ every 24h"]
|
||||
end
|
||||
|
||||
R -->|"Find expiring certs\nCreate renewal jobs"| DB[("PostgreSQL")]
|
||||
J -->|"Process pending jobs\nCoordinate issuance"| DB
|
||||
H -->|"Check heartbeat staleness\nMark agents offline"| DB
|
||||
N -->|"Send pending notifications\nEmail / Webhook"| DB
|
||||
N -->|"Send pending notifications\nEmail / Webhook / Slack"| DB
|
||||
SL -->|"Expire short-lived certs\nMark as Expired"| DB
|
||||
NS -->|"Probe TLS endpoints\nStore discovered certs"| DB
|
||||
DG -->|"Generate & send HTML digest\nEmail to recipients"| DB
|
||||
```
|
||||
|
||||
| Loop | Interval | Timeout | Purpose |
|
||||
@@ -332,6 +479,11 @@ flowchart LR
|
||||
| Job processor | 30 seconds | 2 minutes | Processes pending jobs (issuance, renewal, deployment) |
|
||||
| Agent health check | 2 minutes | 1 minute | Marks agents as offline if heartbeat is stale |
|
||||
| Notification processor | 1 minute | 1 minute | Sends pending notifications via configured channels |
|
||||
| Short-lived expiry | 30 seconds | 30 seconds | Marks expired short-lived certificates (profile TTL < 1 hour) |
|
||||
| Network scanner | 6 hours | 30 minutes | Probes TLS endpoints on configured CIDR ranges, stores discovered certs (M21, opt-in via `CERTCTL_NETWORK_SCAN_ENABLED`). CIDR size validated at API level — max /20 (4096 IPs) per range. |
|
||||
| Certificate digest | 24 hours | 5 minutes | Generates HTML email with certificate stats, expiration timeline, job health, agent count. Does NOT run on startup — waits for first scheduled tick. Configurable interval and recipients via `CERTCTL_DIGEST_INTERVAL` and `CERTCTL_DIGEST_RECIPIENTS`. Falls back to certificate owner emails if no explicit recipients configured. |
|
||||
|
||||
Each loop uses `sync/atomic.Bool` idempotency guards to prevent concurrent tick execution — if a loop iteration is still running when the next tick fires, the tick is skipped with a warning log. All loops (including short-lived expiry check) run immediately on startup before entering their ticker interval, ensuring no gap between scheduler start and first execution. The certificate digest loop is the exception — it does NOT run on startup, only on scheduled ticks. Graceful shutdown uses `sync.WaitGroup` with `WaitForCompletion()` to drain all in-flight work before process exit.
|
||||
|
||||
Each operation has a context timeout to prevent indefinite hangs if external services become unresponsive.
|
||||
|
||||
@@ -352,9 +504,8 @@ flowchart TB
|
||||
II["IssuerConnector Interface\nIssueCertificate() | RenewCertificate()\nRevokeCertificate() | GetOrderStatus()"]
|
||||
II --> LC["Local CA"]
|
||||
II --> ACME["ACME v2"]
|
||||
II --> SC["step-ca (planned)"]
|
||||
II --> OC["OpenSSL / Custom CA (planned)"]
|
||||
II --> AD["ADCS (planned)"]
|
||||
II --> SC["step-ca"]
|
||||
II --> OC["OpenSSL / Custom CA"]
|
||||
II --> VP["Vault PKI (planned)"]
|
||||
end
|
||||
|
||||
@@ -362,6 +513,10 @@ flowchart TB
|
||||
direction TB
|
||||
TI["TargetConnector Interface\nDeployCertificate()\nValidateDeployment()"]
|
||||
TI --> NG["NGINX"]
|
||||
TI --> AP["Apache httpd"]
|
||||
TI --> HP["HAProxy"]
|
||||
TI --> TF["Traefik"]
|
||||
TI --> CD["Caddy"]
|
||||
TI --> F5["F5 BIG-IP (interface only)"]
|
||||
TI --> IIS["IIS (interface only)"]
|
||||
end
|
||||
@@ -371,7 +526,10 @@ flowchart TB
|
||||
NI["NotifierConnector Interface\nSendAlert() | SendEvent()"]
|
||||
NI --> EM["Email (SMTP)"]
|
||||
NI --> WH["Webhook (HTTP)"]
|
||||
NI --> SL["Slack (future)"]
|
||||
NI --> SL["Slack"]
|
||||
NI --> TM["Microsoft Teams"]
|
||||
NI --> PD["PagerDuty"]
|
||||
NI --> OG["OpsGenie"]
|
||||
end
|
||||
```
|
||||
|
||||
@@ -406,10 +564,17 @@ type Connector interface {
|
||||
RenewCertificate(ctx context.Context, request RenewalRequest) (*IssuanceResult, error)
|
||||
RevokeCertificate(ctx context.Context, request RevocationRequest) error
|
||||
GetOrderStatus(ctx context.Context, orderID string) (*OrderStatus, error)
|
||||
GenerateCRL(ctx context.Context, revokedCerts []RevokedCertEntry) ([]byte, error)
|
||||
SignOCSPResponse(ctx context.Context, req OCSPSignRequest) ([]byte, error)
|
||||
GetCACertPEM(ctx context.Context) (string, error)
|
||||
}
|
||||
```
|
||||
|
||||
Built-in issuers: **Local CA** (self-signed, in-memory CA for development/demos using `crypto/x509`) and **ACME v2** (fully implemented with HTTP-01 challenge solving, compatible with Let's Encrypt, Sectigo, and any ACME-compliant CA). The ACME connector uses `golang.org/x/crypto/acme`, generates an ECDSA P-256 account key, handles account registration with ToS acceptance, order creation, HTTP-01 challenge solving via a built-in temporary HTTP server, order finalization, and DER-to-PEM chain conversion. Configure via `CERTCTL_ACME_DIRECTORY_URL` and `CERTCTL_ACME_EMAIL`.
|
||||
Built-in issuers: **Local CA** (self-signed or sub-CA mode using `crypto/x509`), **ACME v2** (HTTP-01, DNS-01, and DNS-PERSIST-01 challenges, compatible with Let's Encrypt, ZeroSSL, Sectigo, Google Trust Services, and any ACME-compliant CA), **step-ca** (Smallstep private CA via native /sign API with JWK provisioner auth), and **OpenSSL/Custom CA** (script-based signing delegating to user-provided shell scripts). The ACME connector uses `golang.org/x/crypto/acme`, generates an ECDSA P-256 account key, handles account registration with ToS acceptance and optional External Account Binding (EAB) for CAs that require it (ZeroSSL, Google Trust Services, SSL.com), order creation, challenge solving (HTTP-01 via built-in server, DNS-01 via script-based hooks, DNS-PERSIST-01 via standing TXT records with auto-fallback to DNS-01), order finalization, and DER-to-PEM chain conversion. For ZeroSSL, EAB credentials are auto-fetched from ZeroSSL's public API when the directory URL is detected as ZeroSSL and no EAB credentials are provided — zero-friction onboarding with no dashboard visit required.
|
||||
|
||||
**ACME Renewal Information (ARI, RFC 9702):** The ACME connector supports CA-directed renewal timing via the `GetRenewalInfo()` method. Instead of using fixed thresholds (e.g., renew 30 days before expiry), the CA tells certctl when to renew by providing a `suggestedWindow` with start and end times. This is useful for distributing renewal load during maintenance windows and coordinating mass-revocation scenarios. Enable with `CERTCTL_ACME_ARI_ENABLED=true`. Cert ID is computed as `base64url(SHA-256(DER cert))` per RFC 9702. If the CA doesn't support ARI (404 from the ARI endpoint), certctl automatically falls back to threshold-based renewal — no operator intervention required. Errors from the CA are logged as warnings.
|
||||
|
||||
The interface also includes `GetCACertPEM(ctx)` for CA chain distribution (used by the EST server's `/cacerts` endpoint).
|
||||
|
||||
### Target Connector
|
||||
|
||||
@@ -425,9 +590,11 @@ type Connector interface {
|
||||
|
||||
The `DeploymentRequest` struct carries the full material needed by the target system: the signed certificate, the CA chain, the agent-generated private key, target-specific configuration, and arbitrary metadata. The key field is populated by the agent from its local key store (`CERTCTL_KEY_DIR`) — it never originates from the control plane.
|
||||
|
||||
Built-in targets: **NGINX** (writes cert/chain/key files, validates with `nginx -t`, reloads), **F5 BIG-IP** (interface only — iControl REST flow mapped, implementation planned), **IIS** (interface only — WinRM/PowerShell flow mapped, implementation planned).
|
||||
Built-in targets: **NGINX** (writes cert/chain/key files, validates with `nginx -t`, reloads), **Apache httpd** (writes cert/chain/key files, validates with `apachectl configtest`, graceful reload), **HAProxy** (combined PEM file with cert+chain+key, validates config, reloads via systemctl/signal), **Traefik** (file provider — writes cert/key to watched directory, Traefik auto-reloads), **Caddy** (dual-mode: admin API hot-reload or file-based), **F5 BIG-IP** (interface only — proxy agent + iControl REST, implementation planned), **IIS** (interface only — dual-mode: agent-local PowerShell primary + proxy agent WinRM for agentless targets, implementation planned).
|
||||
|
||||
**Planned targets (V2):** Apache httpd (file write, `apachectl configtest`, graceful reload), HAProxy (combined PEM file write, reload via socket/signal). **Planned targets (V3):** AWS ALB/CloudFront, Azure Key Vault, Palo Alto, FortiGate, Citrix ADC, Kubernetes Secrets.
|
||||
After deployment, agents can perform **post-deployment TLS verification**: the agent probes the live TLS endpoint using `crypto/tls.DialWithDialer` and compares the SHA-256 fingerprint of the served certificate against what was deployed. Results are reported via `POST /api/v1/jobs/{id}/verify` and stored on the job record. Verification is best-effort — failures don't block or rollback deployments.
|
||||
|
||||
Additional cloud, network, and Kubernetes target connectors are planned for future releases.
|
||||
|
||||
### Notifier Connector
|
||||
|
||||
@@ -441,10 +608,49 @@ type Connector interface {
|
||||
}
|
||||
```
|
||||
|
||||
Built-in notifiers: **Email** (SMTP) and **Webhook** (HTTP POST).
|
||||
Built-in notifiers: **Email** (SMTP), **Webhook** (HTTP POST), **Slack** (incoming webhook), **Microsoft Teams** (MessageCard), **PagerDuty** (Events API v2), and **OpsGenie** (Alert API v2). Each is enabled by setting its configuration environment variable.
|
||||
|
||||
See the [Connector Development Guide](connectors.md) for details on building custom connectors.
|
||||
|
||||
### EST Server (RFC 7030)
|
||||
|
||||
The EST (Enrollment over Secure Transport) server provides an industry-standard enrollment interface for devices that need certificates without using the REST API. It runs under `/.well-known/est/` per RFC 7030 and supports four operations: CA certificate distribution (`/cacerts`), initial enrollment (`/simpleenroll`), re-enrollment (`/simplereenroll`), and CSR attributes (`/csrattrs`).
|
||||
|
||||
**Architecture:** EST is a handler-level protocol that delegates certificate issuance to an existing `IssuerConnector`. This means EST is not a new issuer — it's a new *interface* to the existing issuance infrastructure. The `ESTService` bridges the `ESTHandler` to whichever issuer connector is configured via `CERTCTL_EST_ISSUER_ID`.
|
||||
|
||||
```
|
||||
Client (WiFi AP, MDM, IoT)
|
||||
│
|
||||
▼
|
||||
ESTHandler (handler layer)
|
||||
│ CSR parsing, PKCS#7 response encoding
|
||||
▼
|
||||
ESTService (service layer)
|
||||
│ CSR validation, CN/SAN extraction, audit recording
|
||||
▼
|
||||
IssuerConnector (connector layer via IssuerConnectorAdapter)
|
||||
│ Certificate signing (Local CA, step-ca, etc.)
|
||||
▼
|
||||
Signed certificate returned as PKCS#7 certs-only
|
||||
```
|
||||
|
||||
**Wire format:** EST uses PKCS#7 (RFC 2315) certs-only degenerate SignedData for certificate responses and base64-encoded DER for CSR requests. The handler includes a hand-rolled ASN.1 PKCS#7 builder — no external PKCS#7 dependency. The CSR reader accepts both base64-encoded DER (standard EST wire format) and PEM-encoded PKCS#10 (convenience for debugging).
|
||||
|
||||
**Interface:** The `ESTHandler` defines an `ESTService` interface (dependency inversion, same pattern as all other handlers):
|
||||
|
||||
```go
|
||||
type ESTService interface {
|
||||
GetCACerts(ctx context.Context) (string, error)
|
||||
SimpleEnroll(ctx context.Context, csrPEM string) (*domain.ESTEnrollResult, error)
|
||||
SimpleReEnroll(ctx context.Context, csrPEM string) (*domain.ESTEnrollResult, error)
|
||||
GetCSRAttrs(ctx context.Context) ([]byte, error)
|
||||
}
|
||||
```
|
||||
|
||||
**Issuer connector extension:** EST required adding `GetCACertPEM(ctx) (string, error)` to the issuer connector interface so the `/cacerts` endpoint can serve the CA chain. The Local CA connector returns its CA certificate PEM; ACME, step-ca, and OpenSSL connectors return errors (they don't expose a static CA chain — their chains are per-issuance).
|
||||
|
||||
**Audit:** Every EST enrollment is recorded in the audit trail with `protocol: "EST"`, the CN, SANs, issuer ID, serial number, and optional profile ID.
|
||||
|
||||
## Security Model
|
||||
|
||||
### Private Key Management
|
||||
@@ -489,7 +695,7 @@ The control plane only handles public material: certificates, chains, and CSRs.
|
||||
- **API clients → Server**: API key in `Authorization: Bearer` header, or `none` for demo mode
|
||||
- **Agent → Server**: API key registered at agent creation, included in all requests
|
||||
- **Server → Issuers**: ACME account key, or connector-specific credentials
|
||||
- **Agent → Targets**: SSH keys, API tokens, WinRM credentials (stored locally on agent)
|
||||
- **Agent → Targets**: API tokens, WinRM credentials (stored locally on agent or proxy agent — never on server). Credential scope is limited to the agent's network zone.
|
||||
|
||||
### Audit Trail
|
||||
|
||||
@@ -510,6 +716,43 @@ Every action is recorded as an immutable audit event:
|
||||
|
||||
Audit events cannot be modified or deleted. They support filtering by actor, action, resource type, resource ID, and time range. All audit operations are logged via structured `slog` logging; if an audit event fails to persist, the error is logged immediately to ensure no gaps in the audit trail go unnoticed.
|
||||
|
||||
### API Audit Log
|
||||
|
||||
In addition to application-level audit events, certctl records every HTTP API call via middleware. The audit middleware captures method, URL path (excluding query parameters — see security note below), actor (extracted from auth context), SHA-256 request body hash (truncated to 16 characters), response status code, and request latency. Health and readiness probes are excluded to avoid noise.
|
||||
|
||||
**Security: Query Parameter Exclusion** — The audit middleware intentionally records `r.URL.Path` only (not `r.URL.String()` or `r.RequestURI`). Query strings may contain cursor tokens, API keys passed as params, or other sensitive filter values. Since the audit trail is append-only with no deletion capability, any sensitive data recorded would persist permanently.
|
||||
|
||||
Audit recording is async (via goroutine) so it never blocks the HTTP response. If audit persistence fails, the error is logged immediately — the API call still succeeds. The middleware sits after the auth middleware in the stack so the actor identity is available from context.
|
||||
|
||||
### Input Validation and SSRF Protection
|
||||
|
||||
All shell-facing inputs (connector scripts, domain names, ACME tokens) are validated through `internal/validation/command.go` before reaching shell execution. `ValidateShellCommand()` denies all shell metacharacters. `ValidateDomainName()` enforces RFC 1123. `ValidateACMEToken()` restricts to base64url characters. The network scanner filters reserved IP ranges (loopback, link-local including cloud metadata 169.254.169.254, multicast, broadcast) to prevent SSRF, while preserving RFC 1918 private ranges for legitimate internal scanning.
|
||||
|
||||
### Request Body Size Limits
|
||||
|
||||
All incoming HTTP request bodies are capped by `http.MaxBytesReader` middleware (default 1MB, configurable via `CERTCTL_MAX_BODY_SIZE`). Requests exceeding the limit receive a 413 Request Entity Too Large response. The middleware is positioned before authentication in the chain so oversized payloads are rejected early, before any auth processing or database work occurs. Requests without bodies (GET, HEAD, nil body) skip the limit check.
|
||||
|
||||
### CORS
|
||||
|
||||
CORS uses a **deny-by-default** posture: when `CERTCTL_CORS_ORIGINS` is empty, no CORS headers are set and only same-origin requests can read responses. Operators must explicitly configure allowed origins. This prevents accidental exposure of the API to cross-origin requests in production.
|
||||
|
||||
### Middleware Chain Order
|
||||
|
||||
The HTTP middleware stack processes requests in the following order (see `cmd/server/main.go`):
|
||||
|
||||
1. **RequestID** - assigns unique request ID for correlation
|
||||
2. **Logging** - structured slog middleware with request ID propagation
|
||||
3. **Recovery** - panic recovery (catches panics in downstream middleware/handlers)
|
||||
4. **BodyLimit** - request body size cap via `http.MaxBytesReader`
|
||||
5. **RateLimiter** - token bucket rate limiting (optional, when enabled)
|
||||
6. **CORS** - cross-origin request handling (deny-by-default)
|
||||
7. **Auth** - API key or JWT validation
|
||||
8. **AuditLog** - records every API call to the audit trail (requires auth context for actor)
|
||||
|
||||
### Concurrency Safety
|
||||
|
||||
The background scheduler uses `sync/atomic.Bool` idempotency guards on all 6 loops — if a tick fires while the previous iteration is still running, it skips. A `sync.WaitGroup` tracks all in-flight goroutines. `WaitForCompletion(timeout)` blocks during shutdown until all work finishes or the timeout expires, preventing state corruption from mid-flight database operations during process exit.
|
||||
|
||||
### Logging
|
||||
|
||||
All logging throughout the service layer uses Go's `log/slog` package for structured, queryable logs. This replaces ad-hoc `fmt.Printf` statements with consistent key-value logging that includes request context, operation names, and error details. Agents also implement exponential backoff on network failures to gracefully handle temporary connectivity issues with the control plane.
|
||||
@@ -525,10 +768,60 @@ All endpoints are under `/api/v1/` and follow consistent patterns:
|
||||
- **Delete**: `DELETE /api/v1/{resources}/{id}` — returns `204` (soft delete/archive)
|
||||
- **Actions**: `POST /api/v1/{resources}/{id}/{action}` — returns `202` for async operations
|
||||
|
||||
Resources: certificates, issuers, targets, agents, jobs, policies, teams, owners, audit, notifications.
|
||||
Resources: certificates, issuers, targets, agents, jobs, policies, profiles, teams, owners, agent-groups, audit, notifications, discovered-certificates, discovery-scans, network-scan-targets, stats, metrics.
|
||||
|
||||
The full API is documented in an OpenAPI 3.1 specification at `api/openapi.yaml` with 99 endpoints across 23 resource domains (97 under `/api/v1/` + `/.well-known/est/` plus `/health` and `/ready`; includes auth, 7 discovery endpoints from M18b, 6 network scan endpoints from M21, Prometheus metrics from M22, 4 EST enrollment endpoints from M23, 2 digest endpoints from M29), all request/response schemas, and pagination conventions. See the [OpenAPI Guide](openapi.md) for usage with Swagger UI and SDK generation.
|
||||
|
||||
Jobs support additional action endpoints: `POST /api/v1/jobs/{id}/cancel`, `POST /api/v1/jobs/{id}/approve`, `POST /api/v1/jobs/{id}/reject`.
|
||||
|
||||
**Enhanced Query Features (M20):** Certificate list endpoints support additional query capabilities beyond basic pagination:
|
||||
|
||||
- **Sorting**: `?sort=notAfter` (ascending) or `?sort=-createdAt` (descending). Whitelist: notAfter, expiresAt, createdAt, updatedAt, commonName, name, status, environment.
|
||||
- **Time-range filters**: `?expires_before=`, `?expires_after=`, `?created_after=`, `?updated_after=` (RFC 3339 format).
|
||||
- **Cursor pagination**: `?cursor=<token>&page_size=100` for efficient keyset pagination alongside traditional page-based.
|
||||
- **Sparse fields**: `?fields=id,common_name,status` to reduce response payload.
|
||||
- **Additional filters**: `?agent_id=`, `?profile_id=` (in addition to existing status, environment, owner_id, team_id, issuer_id).
|
||||
- **Deployments**: `GET /api/v1/certificates/{id}/deployments` returns deployment targets for a certificate.
|
||||
|
||||
Certificate revocation: `POST /api/v1/certificates/{id}/revoke` with optional `{"reason": "keyCompromise"}`. Supports RFC 5280 reason codes (unspecified, keyCompromise, caCompromise, affiliationChanged, superseded, cessationOfOperation, certificateHold, privilegeWithdrawn). Returns the updated certificate status. Best-effort issuer notification — the revocation succeeds even if the issuer connector is unavailable. A JSON-formatted CRL is available at `GET /api/v1/crl`, and a DER-encoded X.509 CRL signed by the issuing CA at `GET /api/v1/crl/{issuer_id}`. An embedded OCSP responder serves signed responses at `GET /api/v1/ocsp/{issuer_id}/{serial}`. Short-lived certificates (profile TTL < 1 hour) are exempt from CRL/OCSP — expiry is sufficient revocation.
|
||||
|
||||
Certificate export (M27): `GET /api/v1/certificates/{id}/export/pem` returns PEM-encoded certificate and chain, and `POST /api/v1/certificates/{id}/export/pkcs12` returns a PKCS#12 bundle (binary). Private keys are never exported — they remain on agents. All exports are audited with actor, timestamp, and format.
|
||||
|
||||
Health checks live outside the API prefix: `GET /health` and `GET /ready`.
|
||||
|
||||
## MCP Server
|
||||
|
||||
certctl includes an MCP (Model Context Protocol) server as a separate binary (`cmd/mcp-server/`) that enables AI assistants to interact with the certificate platform. The MCP server uses the official MCP Go SDK (`modelcontextprotocol/go-sdk`) with stdio transport for integration with Claude, Cursor, and other MCP-compatible tools.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
AI["AI Assistant\n(Claude, Cursor)"] -->|"stdio"| MCP["MCP Server\ncmd/mcp-server/"]
|
||||
MCP -->|"HTTP + Bearer token"| API["certctl REST API\n:8443"]
|
||||
|
||||
subgraph "78 MCP Tools"
|
||||
T1["Certificate CRUD"]
|
||||
T2["Agent Management"]
|
||||
T3["Job Operations"]
|
||||
T4["Policy/Profile Queries"]
|
||||
T5["Audit Trail Access"]
|
||||
T6["Stats & Metrics"]
|
||||
end
|
||||
|
||||
MCP --> T1 & T2 & T3 & T4 & T5 & T6
|
||||
```
|
||||
|
||||
The MCP server is a stateless HTTP proxy — every MCP tool call translates to an HTTP request to the certctl REST API. It adds no new state, no new dependencies, and no new attack surface beyond what the API already exposes. Configuration is minimal: `CERTCTL_SERVER_URL` and `CERTCTL_API_KEY` environment variables.
|
||||
|
||||
The 78 tools are organized across 16 resource domains with typed input structs and `jsonschema` struct tags for automatic LLM-friendly schema generation. Binary response support handles DER CRL and OCSP endpoints.
|
||||
|
||||
## CLI Tool
|
||||
|
||||
certctl ships with a command-line tool (`certctl-cli`, built from `cmd/cli/main.go`) that wraps the REST API for terminal workflows. The CLI uses Go's standard library only (`flag` + `text/tabwriter`) — no Cobra or other framework dependencies.
|
||||
|
||||
12 subcommands organized by resource: `certs list`, `certs get`, `certs renew`, `certs revoke`, `agents list`, `agents get`, `jobs list`, `jobs get`, `jobs cancel`, `import` (bulk PEM import), `status` (health + summary stats), and `version`. Output is available in table (default) or JSON format via `--format`. Connection is configured via `CERTCTL_SERVER_URL` and `CERTCTL_API_KEY` environment variables or CLI flags.
|
||||
|
||||
The bulk import command (`certctl-cli import <file.pem>`) parses multi-certificate PEM files and creates certificate records via the API — useful for bootstrapping certctl with existing certificate inventory.
|
||||
|
||||
## Deployment Topologies
|
||||
|
||||
### Docker Compose (Development / Small Deployments)
|
||||
@@ -549,7 +842,9 @@ flowchart TB
|
||||
**Credentials & Configuration:**
|
||||
Database and API credentials are managed via environment variables defined in a `.env` file. Copy `deploy/.env.example` to `deploy/.env` for local development and customize credentials for production. The agent key directory (`CERTCTL_KEY_DIR`) is persisted as a named Docker volume (`agent_keys`) at `/var/lib/certctl/keys` for reliable key storage across container restarts.
|
||||
|
||||
### Production (Kubernetes)
|
||||
### Production (Kubernetes with Helm)
|
||||
|
||||
A production-ready Helm chart is available under `deploy/helm/certctl/` with full support for multi-replica deployments, persistent PostgreSQL, agent DaemonSet, optional Ingress, and security best practices.
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
@@ -575,26 +870,117 @@ flowchart TB
|
||||
DS --> DEP
|
||||
```
|
||||
|
||||
**Helm Installation:**
|
||||
|
||||
```bash
|
||||
# Add the chart (if published) or install from local directory
|
||||
helm install certctl deploy/helm/certctl/ \
|
||||
--set server.auth.apiKey="your-secure-key" \
|
||||
--set postgresql.auth.password="your-db-password" \
|
||||
--set ingress.enabled=true \
|
||||
--set ingress.hosts[0].host="certctl.example.com"
|
||||
```
|
||||
|
||||
The Helm chart includes: server Deployment with configurable replicas, liveness/readiness probes, security context (non-root, read-only rootfs), PostgreSQL StatefulSet with persistent volumes, optional Ingress with TLS, ServiceAccount with configurable RBAC, and agent DaemonSet running one agent per node. All certctl configuration options are exposed in `values.yaml` — issuers, targets, notifiers, scheduler intervals, discovery settings, and SMTP for digest emails.
|
||||
|
||||
See `deploy/helm/certctl/values.yaml` for the full configuration reference and `deploy/helm/certctl/Chart.yaml` for version and appVersion details.
|
||||
|
||||
For production, you would also add an ingress controller, TLS termination for the certctl API itself, and external PostgreSQL (RDS, Cloud SQL, etc.).
|
||||
|
||||
## Discovery Data Flow (M18b + M21)
|
||||
|
||||
Certificate discovery enables operators to build a complete inventory of existing certificates before managing them with certctl. There are two discovery modes that feed into the same pipeline:
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph "Discovery Sources"
|
||||
AGENT["certctl-agent\n(filesystem discovery)"]
|
||||
SCAN["Filesystem Scanner\n(CERTCTL_DISCOVERY_DIRS)"]
|
||||
SERVER["certctl-server\n(network discovery)"]
|
||||
NETSCAN["TLS Scanner\n(CIDR ranges + ports)"]
|
||||
end
|
||||
|
||||
EXTRACT["Extract Metadata\n(CN, SANs, serial, issuer, expiry, fingerprint)"]
|
||||
SERVICE["Discovery Service\n(ProcessDiscoveryReport)"]
|
||||
REPO["Discovery Repository\n(upsert with fingerprint dedup)"]
|
||||
DB["PostgreSQL\ndiscovered_certificates\ndiscovery_scans tables"]
|
||||
AUDIT["Audit Service\n(RecordDiscoveryScanCompleted)"]
|
||||
API_LIST["GET /api/v1/discovered-certificates\n(list for triage)"]
|
||||
API_CLAIM["POST /discovered-certificates/{id}/claim"]
|
||||
API_DISMISS["POST /discovered-certificates/{id}/dismiss"]
|
||||
|
||||
AGENT -->|"Scan loop\n(startup + 6h)"| SCAN
|
||||
SCAN --> EXTRACT
|
||||
SERVER -->|"Scheduler loop\n(every 6h)"| NETSCAN
|
||||
NETSCAN -->|"crypto/tls.Dial\n50 goroutines"| EXTRACT
|
||||
EXTRACT --> SERVICE
|
||||
SERVICE --> REPO
|
||||
REPO -->|"Dedup by fingerprint\n+ agent_id + source_path"| DB
|
||||
SERVICE --> AUDIT
|
||||
AUDIT --> DB
|
||||
DB --> API_LIST
|
||||
API_LIST --> API_CLAIM
|
||||
API_LIST --> API_DISMISS
|
||||
```
|
||||
|
||||
**Filesystem Discovery (M18b):**
|
||||
|
||||
1. **Agent-side discovery** — Agent scans `CERTCTL_DISCOVERY_DIRS` on startup and every 6 hours, walking directories recursively and parsing PEM/DER files
|
||||
2. **Metadata extraction** — For each certificate found, extract: common name, SANs, serial number, issuer DN, subject DN, expiration date, key algorithm, key size, is_ca flag, SHA-256 fingerprint (used as dedup key)
|
||||
3. **Server submission** — Agent POSTs scan results as `DiscoveryReport` to `POST /api/v1/agents/{id}/discoveries`
|
||||
4. **Deduplication** — Server uses fingerprint + agent ID + filesystem path as unique key; prevents duplicate records of the same cert on the same agent
|
||||
|
||||
**Network Discovery (M21):**
|
||||
|
||||
1. **Target configuration** — Operator creates network scan targets via `POST /api/v1/network-scan-targets` with CIDR ranges, ports, and scan interval
|
||||
2. **CIDR expansion** — Ranges expanded to individual IPs with /20 safety cap (4096 IPs max)
|
||||
3. **TLS probing** — Server uses `crypto/tls.DialWithDialer` with `InsecureSkipVerify=true` to connect to each endpoint; 50 concurrent goroutines with configurable timeout
|
||||
4. **Certificate extraction** — Full X.509 metadata extracted from TLS handshake peer certificates
|
||||
5. **Sentinel agent** — Results submitted using `server-scanner` as virtual agent ID, with `source_path` set to `ip:port` and `source_format` set to `network`
|
||||
6. **Same pipeline** — Feeds into the same `DiscoveryService.ProcessDiscoveryReport()` as filesystem discovery — same dedup, same audit trail, same triage workflow
|
||||
|
||||
**Common triage workflow (both sources):**
|
||||
|
||||
1. **Storage** — Records stored in `discovered_certificates` table with status = "Unmanaged"
|
||||
2. **Audit** — `discovery_scan_completed` event logged with agent ID, cert count, scan timestamp
|
||||
3. **Operator triage** — Operator queries `GET /api/v1/discovered-certificates?status=Unmanaged` to see new findings
|
||||
4. **Claim or dismiss** — For each unmanaged cert, operator either:
|
||||
- **Claims it** via `POST /discovered-certificates/{id}/claim` — links to existing managed cert or creates new enrollment
|
||||
- **Dismisses it** via `POST /discovered-certificates/{id}/dismiss` — removes from triage, marked as "Dismissed"
|
||||
9. **Status tracking** — `discovery_cert_claimed` and `discovery_cert_dismissed` events audit the operator's decision
|
||||
10. **Summary** — `GET /api/v1/discovery-summary` returns count of Unmanaged, Managed, and Dismissed certs (useful for compliance reporting)
|
||||
|
||||
This data flow is pull-based and non-blocking. Agents discover at their own pace; the server stores results for later review. There's no pressure to claim or dismiss; operators can leave certificates in "Unmanaged" status indefinitely.
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
certctl uses a layered testing approach aligned with the handler → service → repository architecture, with 220+ tests across five layers (service, handler, integration, connector, and frontend). The goal is high-confidence regression prevention at the service and handler layers, where the most complex business logic lives, combined with integration tests that exercise the full request path from HTTP to database.
|
||||
certctl uses a layered testing approach aligned with the handler → service → repository architecture, with 1050+ tests across six layers (service, handler, integration, connector, frontend, and scheduler). The goal is high-confidence regression prevention at the service and handler layers, where the most complex business logic lives, combined with integration tests that exercise the full request path from HTTP to database.
|
||||
|
||||
**Service layer unit tests** (`internal/service/*_test.go`) — 74 test functions across 7 files with mock repositories. These test all business logic in isolation: certificate CRUD with validation, agent lifecycle (registration, heartbeat, CSR submission with both keygen modes), job state machine (creation, processing, cancellation, retry logic), policy evaluation (all 5 rule types, violation creation), renewal and issuance flow (server-side and agent-side keygen paths), and notification deduplication (threshold tag matching, channel routing). Mock repositories are simple structs with function fields, avoiding heavy mocking frameworks — this keeps tests readable and avoids coupling to mock library APIs.
|
||||
**Service layer unit tests** (`internal/service/*_test.go`) — ~238 test functions across 15 files with mock repositories. These test all business logic in isolation: certificate CRUD with validation, certificate revocation (success, already-revoked, archived, invalid reason, all RFC 5280 reason codes, issuer notification, notification service integration, OCSP/CRL generation), agent lifecycle (registration, heartbeat, CSR submission with both keygen modes), job state machine (creation, processing, cancellation, retry logic), policy evaluation (all 5 rule types, violation creation), renewal and issuance flow (server-side and agent-side keygen paths), notification deduplication (threshold tag matching, channel routing), team/owner/agent group CRUD with pagination and audit recording, issuer service CRUD with connection testing, and the issuer connector adapter (type translation between connector and service layers including revocation). Mock repositories are simple structs with function fields, avoiding heavy mocking frameworks — this keeps tests readable and avoids coupling to mock library APIs.
|
||||
|
||||
**Handler layer tests** (`internal/api/handler/*_test.go`) — 127 test functions across 7 files using Go's `httptest` package. Every handler file has a corresponding test file: certificates (22 tests), agents (28 tests), jobs (13 tests), notifications (11 tests), policies (19 tests), issuers (17 tests), and targets (17 tests). Each test file follows the same pattern: a mock service struct with function fields, `httptest.NewRecorder` for capturing responses, and a shared `contextWithRequestID()` helper. Tests cover the happy path, input validation (missing fields, invalid JSON, empty IDs), error propagation from the service layer, method-not-allowed responses, and pagination parameters.
|
||||
**Handler layer tests** (`internal/api/handler/*_test.go`) — ~257 test functions across 11 files using Go's `httptest` package. Every handler file has a corresponding test file: certificates (50 tests including revocation, DER CRL, and OCSP), agents (28 tests), jobs (21 tests including approve/reject), notifications (11 tests), policies (19 tests), profiles (18 tests), issuers (17 tests), targets (17 tests), agent groups (12 tests), teams (26 tests), and owners (21 tests). Each test file follows the same pattern: a mock service struct with function fields, `httptest.NewRecorder` for capturing responses, and a shared `contextWithRequestID()` helper. Tests cover the happy path, input validation (missing fields, invalid JSON, empty IDs, name length limits), error propagation from the service layer, method-not-allowed responses, and pagination parameters.
|
||||
|
||||
**Integration tests** (`internal/integration/`) — Two test files exercising the full stack from HTTP request through router, handler, service, and postgres repository layers. `lifecycle_test.go` has 11 subtests covering the complete certificate lifecycle: team/owner creation, certificate creation, issuer verification, renewal trigger, job verification, agent registration, CSR submission, deployment, and status reporting. `negative_test.go` has 12 subtests covering error paths: nonexistent resource lookups (404s), invalid request bodies (malformed JSON, missing required fields), invalid CSR submission, heartbeat for nonexistent agents, wrong HTTP methods on list endpoints, empty list responses, renewal on nonexistent certificates, and expired certificate lifecycle. Both use a shared `setupTestServer()` that builds a fully-wired server with real postgres repositories and the Local CA issuer connector.
|
||||
**Integration tests** (`internal/integration/`) — Two test files exercising the full stack from HTTP request through router, handler, service, and postgres repository layers. `lifecycle_test.go` has 11 subtests covering the complete certificate lifecycle: team/owner creation, certificate creation, issuer verification, renewal trigger, job verification, agent registration, CSR submission, deployment, and status reporting. `negative_test.go` has 14 subtests covering error paths, 19 M11b endpoint tests, and 8 revocation endpoint tests (M15a+M15b): nonexistent resource lookups (404s), invalid request bodies (malformed JSON, missing required fields), invalid CSR submission, heartbeat for nonexistent agents, wrong HTTP methods on list endpoints, empty list responses, renewal on nonexistent certificates, expired certificate lifecycle, team/owner/agent group CRUD validation, revocation success, already-revoked rejection, not-found revocation, JSON CRL retrieval, DER CRL retrieval, OCSP response retrieval, and short-lived cert exemption. Both use a shared `setupTestServer()` that builds a fully-wired server with real postgres repositories and the Local CA issuer connector. A third file, `e2e_test.go`, contains 8 cross-milestone test functions with 48+ subtests that exercise features across milestones end-to-end: M10 agent metadata via heartbeat, M11 profiles/teams/owners/agent-groups CRUD, M12 issuer registry verification, M13 GUI operation endpoints, M14 stats and metrics, M15 revocation and CRL, M16 notification channels, and M20 enhanced query API (sorting, cursor pagination, sparse fields, time-range filters).
|
||||
|
||||
**Frontend tests** (`web/src/api/client.test.ts`, `web/src/api/utils.test.ts`) — 53 Vitest tests covering the API client and utility functions. The API client tests mock `globalThis.fetch` and verify all endpoint functions (certificates, agents, jobs, policies, issuers, targets, notifications, audit, health) send correct HTTP methods, URLs, headers, and request bodies. They also test API key management (store/retrieve/clear), auth header propagation, 401 event dispatching, and error handling (server messages, error fields, status text fallback). The utility tests use `vi.useFakeTimers()` for deterministic date testing and cover `formatDate`, `formatDateTime`, `timeAgo`, `daysUntil`, and `expiryColor`. The test environment uses jsdom with `@testing-library/jest-dom` matchers.
|
||||
**Frontend tests** (`web/src/api/client.test.ts`, `web/src/api/utils.test.ts`) — 86 Vitest tests covering the API client, stats/metrics endpoints, and utility functions. The API client tests mock `globalThis.fetch` and verify all endpoint functions (certificates, agents, jobs, policies, issuers, targets, notifications, audit, stats, metrics, health) send correct HTTP methods, URLs, headers, and request bodies. They also test API key management (store/retrieve/clear), auth header propagation, 401 event dispatching, and error handling (server messages, error fields, status text fallback). The stats/metrics endpoint tests verify correct query parameter handling and response shape validation. The utility tests use `vi.useFakeTimers()` for deterministic date testing and cover `formatDate`, `formatDateTime`, `timeAgo`, `daysUntil`, and `expiryColor`. The test environment uses jsdom with `@testing-library/jest-dom` matchers.
|
||||
|
||||
**CI pipeline** (`.github/workflows/ci.yml`) — Two parallel jobs: Go (build, vet, test with coverage, coverage threshold enforcement) and Frontend (TypeScript type check, Vitest test suite, Vite production build). The Go job runs all tests with `-coverprofile`, then enforces coverage thresholds: service layer must be at least 30% (current: ~34%) and handler layer must be at least 50% (current: ~61%). These thresholds act as regression floors — they can only go up. The service layer threshold is deliberately lower because much of the service code depends on postgres repositories and external connectors that require real infrastructure to test meaningfully. Connector tests are included via `./internal/connector/issuer/local/...` (the Local CA package, which has unit tests for certificate signing logic). The Frontend job runs `npx vitest run` between the TypeScript check and production build steps.
|
||||
**CLI tests** (`internal/cli/client_test.go`) — 14 tests covering all 10 CLI subcommands with httptest mock servers, PEM parsing for bulk import, auth header verification, and JSON/table output formatting.
|
||||
|
||||
**What's not tested and why:** Postgres repository implementations (`internal/repository/postgres/`) require a real database and are tested only through integration tests, not unit tests. Target connectors (NGINX, F5, IIS) depend on real infrastructure or complex mocks. Scheduler loops are time-dependent and tested manually during development. The ACME connector requires a real ACME server (tested manually against Let's Encrypt staging). These are all candidates for future expansion as the test infrastructure matures.
|
||||
**CI pipeline** (`.github/workflows/ci.yml`) — Two parallel jobs: Go (build, vet, race detection, static analysis, vulnerability scanning, test with coverage, coverage threshold enforcement) and Frontend (TypeScript type check, Vitest test suite, Vite production build). The Go job runs `go test -race` on service, handler, middleware, and scheduler packages to catch data races. It runs `golangci-lint` with 11 linters (errcheck, govet, staticcheck, unused, gosimple, ineffassign, typecheck, gocritic, gosec, bodyclose, noctx) configured in `.golangci.yml`. It runs `govulncheck ./...` to scan dependencies for known CVEs. Coverage thresholds are enforced per-layer: service 60%, handler 60%, domain 40%, middleware 50%. These thresholds act as regression floors — they can only go up. Connector tests are included via `./internal/connector/issuer/...` and `./internal/connector/target/...` (covers Local CA, ACME, step-ca, NGINX, Apache, HAProxy, Traefik, and Caddy packages with unit tests for certificate signing logic, DNS solver, issuer validation, and deployment flows). The Frontend job runs `npx vitest run` between the TypeScript check and production build steps.
|
||||
|
||||
**Connector tests** (`internal/connector/`) — 57 test functions covering issuer, target, and notifier connectors. The Local CA connector has tests for self-signed and sub-CA modes (RSA, ECDSA, config validation, non-CA cert rejection). The ACME DNS solver has 10 tests for script-based DNS-01 and DNS-PERSIST-01 challenges (6 DNS-01 tests + 4 DNS-PERSIST-01 tests covering `PresentPersist` success, no-script error, script failure, and wildcard domain handling). The step-ca connector has tests with a mock HTTP server for issuance, renewal, revocation, and error paths. The OpenSSL/Custom CA connector has 14 tests covering config validation, issuance success/failure/timeout, renewal, revocation, and CRL generation. The NGINX target connector has 13 tests covering config validation, certificate deployment (file writing, permissions, validate/reload commands), and deployment validation. Apache httpd and HAProxy connectors each have 3 tests covering config validation, deployment, and validation flows. Traefik and Caddy connectors have tests covering file-based deployment and (for Caddy) dual-mode API/file configuration. Notifier connector tests span 20 tests across Slack (5), Teams (4), PagerDuty (6), and OpsGenie (5) — verifying channel identity, payload formatting, HTTP error handling, connection failures, auth headers, and configuration defaults.
|
||||
|
||||
**Scheduler tests** (`internal/scheduler/scheduler_test.go`) — Tests for idempotency guards (`sync/atomic.Bool` CompareAndSwap prevents concurrent loop ticks), `WaitForCompletion` success and timeout paths, and multi-loop idempotency.
|
||||
|
||||
**Fuzz tests** (`internal/validation/command_fuzz_test.go`, `internal/domain/revocation_fuzz_test.go`) — Go native fuzz tests (`testing/fuzz`) for command validation functions and revocation domain parsing. These exercise `ValidateShellCommand`, `ValidateDomainName`, and `ValidateACMEToken` with random inputs to discover edge cases.
|
||||
|
||||
**What's not tested and why:** Postgres repository implementations (`internal/repository/postgres/`) require a real database and are tested only through integration tests, not unit tests — a `testcontainers-go` scaffolding for isolated PostgreSQL instances is planned. Target connectors for F5 BIG-IP and IIS are interface stubs (implementation planned for V3). The ACME connector requires a real ACME server (tested manually against Let's Encrypt staging). These are all candidates for future expansion as the test infrastructure matures.
|
||||
|
||||
## What's Next
|
||||
|
||||
- [Quick Start](quickstart.md) — Get certctl running locally
|
||||
- [Advanced Demo](demo-advanced.md) — Issue a certificate end-to-end
|
||||
- [Connector Guide](connectors.md) — Build custom connectors
|
||||
- [Compliance Mapping](compliance.md) — SOC 2, PCI-DSS 4.0, and NIST SP 800-57 alignment
|
||||
- [MCP Server Guide](mcp.md) — AI-native access to the API
|
||||
- [OpenAPI Spec](openapi.md) — Full API reference and SDK generation
|
||||
|
||||
@@ -0,0 +1,335 @@
|
||||
# NIST SP 800-57 Key Management Alignment
|
||||
|
||||
NIST SP 800-57 Part 1 Rev 5 (May 2020) is the authoritative US government guidance on cryptographic key management. This document maps certctl's implementation to its recommendations. certctl follows NIST guidance where applicable; this guide documents the alignment and identifies gaps for future roadmap planning.
|
||||
|
||||
## Contents
|
||||
|
||||
1. [Key Generation (Section 6.1)](#key-generation-section-61)
|
||||
2. [Key Storage and Protection (Sections 6.3, 6.4)](#key-storage-and-protection-sections-63-64)
|
||||
3. [Cryptoperiods (Section 5.3, Table 1)](#cryptoperiods-section-53-table-1)
|
||||
4. [Key States and Transitions (Section 5.2)](#key-states-and-transitions-section-52)
|
||||
5. [Algorithm Recommendations (Section 5.1, SP 800-131A)](#algorithm-recommendations-section-51-sp-800-131a)
|
||||
6. [Key Distribution and Transport (Section 6.2)](#key-distribution-and-transport-section-62)
|
||||
7. [Revocation and Compromise (NIST SP 800-57 Part 3)](#revocation-and-compromise-nist-sp-800-57-part-3)
|
||||
8. [Alignment Summary Table](#alignment-summary-table)
|
||||
9. [Gaps and Remediation Roadmap](#gaps-and-remediation-roadmap)
|
||||
- [V2 (Current)](#v2-current)
|
||||
- [V3 (Planned: 2026)](#v3-planned-2026)
|
||||
- [V5 (Planned: 2027+)](#v5-planned-2027)
|
||||
- [Post-Quantum (2027+)](#post-quantum-2027)
|
||||
10. [References](#references)
|
||||
11. [Questions or Corrections?](#questions-or-corrections)
|
||||
|
||||
## Key Generation (Section 6.1)
|
||||
|
||||
certctl generates certificate keys on agent infrastructure using Go's `crypto/rand` for entropy, backed by `/dev/urandom` on Linux and `CryptGenRandom` on Windows. Key generation happens as follows:
|
||||
|
||||
**Agent-Side Key Generation (Production Default)**
|
||||
- Agents generate ECDSA P-256 key pairs per certificate using `crypto/ecdsa` + `crypto/elliptic` (Go stdlib)
|
||||
- Key generation triggered by `AwaitingCSR` job state in renewal/issuance workflows
|
||||
- Agent creates Certificate Signing Request (CSR) with `x509.CreateCertificateRequest`, signed with the agent's private key
|
||||
- Only the CSR crosses the network to the control plane; private key material never leaves the agent
|
||||
- Configuration: `CERTCTL_KEYGEN_MODE=agent` (default, production)
|
||||
|
||||
**Server-Side Key Generation (Demo Only)**
|
||||
- Available for development and testing via `CERTCTL_KEYGEN_MODE=server`
|
||||
- Explicitly logged as a warning at startup: "server-side key generation enabled (CERTCTL_KEYGEN_MODE=server) — private keys touch control plane, demo only"
|
||||
- Docker Compose demo uses server mode for backward compatibility
|
||||
- Not recommended for production; agent mode is the secure default
|
||||
|
||||
**Entropy Source**
|
||||
- `crypto/rand` provides cryptographically secure random bytes
|
||||
- On Linux: backed by `/dev/urandom` via `getrandom()` syscall
|
||||
- On Windows: backed by `CryptGenRandom()` (now `BCryptGenRandom()`)
|
||||
- Meets NIST SP 800-90B requirements for entropy generation
|
||||
|
||||
## Key Storage and Protection (Sections 6.3, 6.4)
|
||||
|
||||
certctl implements tiered key storage with different protection profiles based on key purpose.
|
||||
|
||||
**Agent Private Keys**
|
||||
- Stored on agent filesystem at `CERTCTL_KEY_DIR` (default: `/var/lib/certctl/keys`)
|
||||
- File permissions: 0600 (read/write by agent process only, no world/group access)
|
||||
- One PEM file per certificate, organized by certificate ID
|
||||
- Accessible only to the agent process; isolated from other processes
|
||||
- For container deployments: use Docker volumes with restricted permissions (`-v /var/lib/certctl/keys:0600`)
|
||||
|
||||
**Issuing CA Keys (Local CA Connector)**
|
||||
- Loaded from disk at server startup via `CERTCTL_CA_CERT_PATH` and `CERTCTL_CA_KEY_PATH` env vars
|
||||
- Supports RSA (PKCS#1, PKCS#8) and ECDSA (SEC1, PKCS#8) key formats
|
||||
- Validates certificate constraints before use:
|
||||
- `IsCA=true` flag present
|
||||
- `KeyUsageCertSign` extension set
|
||||
- Valid certificate chain (for sub-CA mode)
|
||||
- Keys held in memory during server runtime (no on-disk caching after load)
|
||||
- Cleared from memory only on server shutdown
|
||||
|
||||
**Sub-CA Mode (Enterprise Integration)**
|
||||
- CA certificate and key signed by upstream enterprise root (e.g., Active Directory Certificate Services)
|
||||
- Certctl acts as subordinate CA, inheriting issuer DN from upstream CA
|
||||
- All issued certificates chain to enterprise trust anchor
|
||||
- CA key protection inherits upstream root's key management practices
|
||||
- Configured via: `CERTCTL_CA_CERT_PATH=/path/to/ca.crt` and `CERTCTL_CA_KEY_PATH=/path/to/ca.key`
|
||||
|
||||
**NIST Gap: HSM Storage**
|
||||
NIST SP 800-57 Part 1 recommends Hardware Security Module (HSM) storage for high-value keys (CA signing keys). certctl V2 uses filesystem storage on the server. HSM support is planned for V5 roadmap, enabling integration with:
|
||||
- AWS CloudHSM
|
||||
- Azure Dedicated HSM
|
||||
- Thales Luna, Gemalto SafeNet, YubiHSM (on-premises)
|
||||
- PKCS#11-compatible devices
|
||||
|
||||
## Cryptoperiods (Section 5.3, Table 1)
|
||||
|
||||
NIST recommends cryptoperiods (key validity durations) based on key type and security requirements. certctl enforces cryptoperiods through certificate profiles and renewal policies.
|
||||
|
||||
**Certificate Profile Enforcement**
|
||||
- Certificate profiles (M11a) define `max_ttl` constraint per enrollment profile
|
||||
- All certificates issued through a profile cannot exceed the profile's max_ttl
|
||||
- Profile configuration example:
|
||||
```json
|
||||
{
|
||||
"id": "prof-web-prod",
|
||||
"name": "Production Web Certs",
|
||||
"max_ttl_seconds": 31536000, // 1 year max
|
||||
"allowed_key_algorithms": ["ECDSA_P256"],
|
||||
"required_sans": ["example.com"]
|
||||
}
|
||||
```
|
||||
|
||||
**Renewal Thresholds**
|
||||
- Renewal policies with configurable `alert_thresholds_days`: `[30, 14, 7, 0]` (days before expiry)
|
||||
- Background scheduler checks renewal eligibility every 1 hour
|
||||
- Certificates transitioned to `Expiring` status at 30 days, `Expired` at 0 days
|
||||
- Renewal workflow can be triggered manually or automatically
|
||||
|
||||
**NIST Cryptoperiod Recommendations vs certctl Implementation**
|
||||
|
||||
| Key Type | NIST Recommendation | certctl Implementation |
|
||||
|----------|---------------------|------------------------|
|
||||
| CA signing key | 3–10 years | Configured via CA certificate not-after date; inheritable from upstream CA in sub-CA mode |
|
||||
| End-entity web server cert | 1–3 years (trending shorter) | Profile `max_ttl` configurable; ACME issuer typically 90 days; SC-081v3 mandating 47 days by 2029 |
|
||||
| Code signing cert | 2–8 years | Profile enforcement via `max_ttl`; not primary certctl use case |
|
||||
| Short-lived credentials | < 1 hour recommended | Profile TTL < 1 hour; exempt from CRL/OCSP (expiry is sufficient revocation); auto-expiry on scheduler tick |
|
||||
| OCSP signing key | 1–2 years | Embedded OCSP responder uses issuing CA key (same period as issuer) or delegated signing cert |
|
||||
| TLS/SSL interoperability cert | 1–2 years | Trending 1 year or less; certctl's ACME/sub-CA/step-ca issuers all support short periods |
|
||||
|
||||
## Key States and Transitions (Section 5.2)
|
||||
|
||||
NIST defines lifecycle states for keys: pre-activation, active, suspended, deactivated, compromised, and destroyed. certctl maps these to certificate and job states:
|
||||
|
||||
| NIST Key State | certctl Equivalent | Implementation |
|
||||
|---|---|---|
|
||||
| **Pre-activation** | `Pending` job state / `AwaitingCSR` | Job created but key not yet generated; awaiting agent CSR submission (agent-mode) or server keygen (demo mode) |
|
||||
| **Active** | Certificate status `Active` | Cert deployed to targets and in use; within validity period (not before < now < not after) |
|
||||
| **Suspended** | Job state `AwaitingApproval` | Interactive approval holds deployment job pending human review; resumes on approval or cancels on rejection |
|
||||
| **Deactivated** | Certificate status `Expired` | Past not-after date; auto-transitioned by scheduler every 2 minutes; renewal eligible |
|
||||
| **Compromised** | Certificate status `Revoked` | Issued via `POST /api/v1/certificates/{id}/revoke` with RFC 5280 revocation reason |
|
||||
| **Destroyed** | Archived (implementation detail) | Operator responsibility; certctl retains all certs in audit trail for compliance; no destructive deletion API |
|
||||
|
||||
**State Transition Audit Trail**
|
||||
All transitions logged to immutable `audit_events` table with:
|
||||
- Event type (e.g., `certificate_revoked`, `renewal_job_completed`)
|
||||
- Actor (authenticated user or agent ID)
|
||||
- Timestamp (RFC3339)
|
||||
- Resource (certificate ID)
|
||||
- Reason (revocation reason code, approval reason, etc.)
|
||||
- HTTP method, path, status (for API calls)
|
||||
|
||||
Example audit entry for revocation:
|
||||
```json
|
||||
{
|
||||
"id": "ae-2024-0615",
|
||||
"event_type": "certificate_revoked",
|
||||
"actor": "ops-alice@example.com",
|
||||
"timestamp": "2024-06-15T14:23:00Z",
|
||||
"resource_id": "cert-web-prod-2024",
|
||||
"resource_type": "certificate",
|
||||
"description": "Revoked: reason=keyCompromise",
|
||||
"body_hash": "sha256:a1b2c3d..."
|
||||
}
|
||||
```
|
||||
|
||||
## Algorithm Recommendations (Section 5.1, SP 800-131A)
|
||||
|
||||
NIST SP 800-131A Rev 2 (January 2024) categorizes cryptographic algorithms as Approved, Conditionally Approved, or Disallowed. certctl implements only NIST-approved algorithms:
|
||||
|
||||
| Algorithm | NIST Status | certctl Support | Notes |
|
||||
|-----------|-------------|-----------------|-------|
|
||||
| **ECDSA P-256** | Approved (128-bit security strength) | Default for agent-side keygen | Meets NIST curve requirements (FIPS 186-4) |
|
||||
| **ECDSA P-384** | Approved (192-bit security strength) | Supported via profile configuration | Higher security margin; slower than P-256 |
|
||||
| **ECDSA P-521** | Approved (256-bit security strength) | Supported via profile configuration | Rarely needed; overkill for TLS |
|
||||
| **RSA 2048** | Approved minimum (112-bit security, transitioning) | Supported via all issuers | Deprecated path; migrate to 3072+ by 2030 per NIST |
|
||||
| **RSA 3072** | Approved (128-bit security) | Supported via all issuers | Recommended minimum for long-term security |
|
||||
| **RSA 4096** | Approved (192-bit security) | Supported via all issuers | Supported but slower; overkill for most TLS |
|
||||
| **SHA-256** | Approved | Used throughout | CSR signing, certificate fingerprints, audit body hashing, CRL/OCSP signing |
|
||||
| **SHA-384** | Approved (192-bit) | Supported where algorithm selection available | Used in some CA signing scenarios |
|
||||
| **SHA-512** | Approved (256-bit) | Supported where algorithm selection available | Rarely needed; SHA-256 suffices for most use cases |
|
||||
| **SHA-1** | Deprecated | Not used in certctl | Browsers reject SHA-1 certs; certctl never generates them |
|
||||
|
||||
**Algorithm Enforcement via Profiles**
|
||||
Certificate profiles enforce allowed key algorithms:
|
||||
```json
|
||||
{
|
||||
"id": "prof-web-prod",
|
||||
"allowed_key_algorithms": ["ECDSA_P256", "ECDSA_P384", "RSA3072"]
|
||||
}
|
||||
```
|
||||
|
||||
**Post-Quantum Cryptography (Tracking)**
|
||||
NIST has finalized PQC standards (FIPS 204, FIPS 205) in August 2024:
|
||||
- **ML-KEM** (Kyber): Approved key encapsulation mechanism
|
||||
- **ML-DSA** (Dilithium): Approved digital signature algorithm
|
||||
- **SLH-DSA** (SPHINCS+): Approved stateless hash-based signature scheme
|
||||
|
||||
certctl will track NIST's PQC roadmap and plan integration when hybrid PQC+classical certificate formats reach browser/infrastructure support. Currently, pure PQC certificates are not widely interoperable.
|
||||
|
||||
## Key Distribution and Transport (Section 6.2)
|
||||
|
||||
NIST SP 800-57 Part 1 Section 6.2 addresses secure key distribution to minimize exposure during transit. certctl implements a zero-transmission-of-private-keys model:
|
||||
|
||||
**Private Key Distribution**
|
||||
- Agent-side keygen model: Private keys never leave agent infrastructure
|
||||
- CSR transmitted over HTTPS (TLS 1.2+) with mutual TLS optional
|
||||
- API key authentication via `Authorization: Bearer <api-key>` header
|
||||
- All API calls logged to immutable audit trail
|
||||
|
||||
**Signed Certificate Distribution**
|
||||
- Certificates (public component) distributed via `GET /agents/{id}/work` over HTTPS
|
||||
- Work endpoint enriches deployment jobs with certificate PEM and metadata
|
||||
- Certificate PEM is idempotent (same cert always returns same bytes)
|
||||
|
||||
**Target Deployment**
|
||||
- Deployment to targets via local filesystem write (NGINX, Apache, HAProxy)
|
||||
- No network transmission of private keys to targets
|
||||
- Agents read local private key from `CERTCTL_KEY_DIR` on deployment
|
||||
- For appliances without agents (F5 BIG-IP, IIS), proxy agent pattern:
|
||||
- Proxy agent runs in same trust zone as appliance
|
||||
- Proxy agent holds target API credentials (iControl, WinRM)
|
||||
- Control plane never communicates with appliance directly
|
||||
- Deployment request includes certificate and proxy agent ID
|
||||
- Proxy agent executes deployment via appliance API
|
||||
|
||||
**Revocation Distribution**
|
||||
- Certificate Revocation List (CRL) via `GET /api/v1/crl/{issuer_id}`
|
||||
- Returns DER-encoded X.509 CRL signed by issuing CA
|
||||
- 24-hour validity period
|
||||
- Includes all revoked serials, reasons, and revocation timestamps
|
||||
- Subject to URL caching; OCSP preferred for real-time revocation
|
||||
- OCSP via `GET /api/v1/ocsp/{issuer_id}/{serial}`
|
||||
- Returns DER-encoded OCSP response (OCSPResponse ASN.1 structure)
|
||||
- Signed by issuing CA (or delegated OCSP signing cert)
|
||||
- Responds with good/revoked/unknown status
|
||||
- Real-time, more bandwidth-efficient than CRL polling
|
||||
|
||||
## Revocation and Compromise (NIST SP 800-57 Part 3)
|
||||
|
||||
NIST SP 800-57 Part 3 covers revocation (Section 2.5) when keys are suspected compromised or no longer needed. certctl implements comprehensive revocation infrastructure:
|
||||
|
||||
**Revocation API**
|
||||
- Endpoint: `POST /api/v1/certificates/{id}/revoke`
|
||||
- Request body:
|
||||
```json
|
||||
{
|
||||
"reason": "keyCompromise",
|
||||
"reason_text": "Private key exposed in log file"
|
||||
}
|
||||
```
|
||||
- Supports all 8 RFC 5280 revocation reason codes:
|
||||
- `unspecified` — no specific reason provided
|
||||
- `keyCompromise` — private key suspected compromised
|
||||
- `caCompromise` — issuing CA key compromised
|
||||
- `affiliationChanged` — subject org/affiliation changed
|
||||
- `superseded` — cert superseded by newer cert
|
||||
- `cessationOfOperation` — key no longer in use
|
||||
- `certificateHold` — temporary hold (rarely used)
|
||||
- `privilegeWithdrawn` — subject authorization withdrawn
|
||||
|
||||
**Revocation Recording**
|
||||
- Certificate status updated to `Revoked`
|
||||
- Entry recorded in `certificate_revocations` table with:
|
||||
- Certificate serial number
|
||||
- Revocation timestamp
|
||||
- Revocation reason code
|
||||
- Issuer ID
|
||||
- Idempotent (revoking an already-revoked cert is safe; returns 200 OK)
|
||||
|
||||
**Issuer Notification (Best-Effort)**
|
||||
- Control plane calls `issuer.RevokeCertificate(ctx, serial, reason)` on issuing connector
|
||||
- Failure does not block the revocation (async, logged, retried)
|
||||
- Supported issuers:
|
||||
- Local CA: generates new CRL immediately
|
||||
- ACME: submits revocation to ACME server (RFC 8555 Section 7.6)
|
||||
- step-ca: calls `/revoke` API
|
||||
- OpenSSL: executes user-provided revocation script
|
||||
|
||||
**Revocation Notifications**
|
||||
- Notifiers triggered after revocation recorded: Slack, Teams, PagerDuty, OpsGenie, email, webhook
|
||||
- Message includes certificate common name, issuer, reason, actor, timestamp
|
||||
- Delivery is asynchronous and retried on failure
|
||||
|
||||
**CRL and OCSP Distribution**
|
||||
- CRL updated on every revocation (or scheduled refresh for non-issued revocations)
|
||||
- OCSP responder queries revocation table in real-time
|
||||
- Short-lived certificate exemption: certs with TTL < 1 hour skip CRL/OCSP (expiry is sufficient revocation)
|
||||
|
||||
**Revocation Audit Trail**
|
||||
All revocation events logged:
|
||||
- Event type: `certificate_revoked`
|
||||
- Actor: authenticated user or service
|
||||
- Reason code: RFC 5280 enum
|
||||
- Timestamp: RFC3339
|
||||
- Issuer notification status: success or error reason
|
||||
|
||||
## Alignment Summary Table
|
||||
|
||||
| NIST SP 800-57 Area | Status | Coverage | Notes |
|
||||
|---|---|---|---|
|
||||
| **Key Generation** | ✅ Aligned | 100% | Agent-side ECDSA P-256 using crypto/rand; server mode flagged as demo-only |
|
||||
| **Key Storage** | ⚠️ Partially Aligned | 80% | Filesystem with 0600 perms; HSM support planned V5 |
|
||||
| **Cryptoperiods** | ✅ Aligned | 100% | Profile-enforced max_ttl; threshold-based renewal alerting |
|
||||
| **Key States** | ✅ Aligned | 100% | Full lifecycle tracking with immutable audit trail |
|
||||
| **Algorithms** | ✅ Aligned | 100% | NIST-approved algorithms only; post-quantum tracking in progress |
|
||||
| **Key Distribution** | ✅ Aligned | 100% | Private keys never transmitted; CSR/cert over TLS; agent-local deployment |
|
||||
| **Revocation** | ✅ Aligned | 100% | CRL, OCSP, all RFC 5280 reason codes; real-time updates |
|
||||
|
||||
## Gaps and Remediation Roadmap
|
||||
|
||||
### V2 (Current)
|
||||
- [x] Agent-side key generation
|
||||
- [x] Profile-enforced cryptoperiods
|
||||
- [x] CRL and OCSP distribution
|
||||
- [x] RFC 5280 revocation support
|
||||
- [x] Immutable audit trail
|
||||
|
||||
### V3 (Planned: 2026)
|
||||
- Role-based access control (limit revocation/approval to authorized operators)
|
||||
- Bulk revocation by profile/owner/agent (fleet-level revocation policy)
|
||||
|
||||
### V5 (Planned: 2027+)
|
||||
- HSM support for CA key storage
|
||||
- PKCS#11 integration for hardware tokens
|
||||
- FIPS 140-2/3 validated crypto module (BoringCrypto build or external FIPS library)
|
||||
- Key destruction API (explicit secure erasure of agent keys)
|
||||
- Key escrow / recovery mechanism (backup encrypted private keys for disaster recovery)
|
||||
|
||||
### Post-Quantum (2027+)
|
||||
- ML-KEM and ML-DSA support when browser/TLS ecosystem supports hybrid certificates
|
||||
- Migration path documentation (how to transition existing RSA certs to PQC)
|
||||
|
||||
## References
|
||||
|
||||
- NIST SP 800-57 Part 1 Rev 5 (May 2020): https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-57pt1r5.pdf
|
||||
- NIST SP 800-131A Rev 2 (January 2024): https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-131Ar2.pdf
|
||||
- FIPS 186-4 (Digital Signature Standard): https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.186-4.pdf
|
||||
- RFC 5280 (X.509 PKI Certificate and CRL Profile): https://tools.ietf.org/html/rfc5280
|
||||
- RFC 8555 (Automatic Certificate Management Environment): https://tools.ietf.org/html/rfc8555
|
||||
- NIST FIPS 204 (ML-DSA): https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.204.pdf
|
||||
- NIST FIPS 205 (ML-KEM): https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.205.pdf
|
||||
|
||||
## Questions or Corrections?
|
||||
|
||||
This document reflects certctl's implementation as of March 2026. For the latest code, refer to:
|
||||
- Key generation: `cmd/agent/main.go` (agent keygen) and `internal/service/renewal.go` (server keygen)
|
||||
- Key storage: `internal/config/config.go` (CERTCTL_KEY_DIR, CERTCTL_CA_CERT_PATH)
|
||||
- Revocation: `internal/service/revocation.go` and `internal/api/handler/certificates.go`
|
||||
- Audit trail: `internal/api/middleware/audit.go`
|
||||
@@ -0,0 +1,819 @@
|
||||
# PCI-DSS 4.0 Compliance Mapping
|
||||
|
||||
This guide maps certctl's existing capabilities to PCI-DSS 4.0 requirements relevant to TLS certificate and cryptographic key management. It is **not a compliance attestation** — a qualified security assessor (QSA) must evaluate your organization's complete control environment. Rather, this document helps you understand which PCI-DSS control objectives certctl supports and where operator responsibility lies.
|
||||
|
||||
Organizations subject to PCI-DSS typically need to demonstrate control over certificate issuance, renewal, rotation, revocation, and key management. Certctl automates the technical controls for certificate lifecycle; compliance depends on how you deploy, monitor, and audit it.
|
||||
|
||||
## Contents
|
||||
|
||||
1. [How to Use This Guide](#how-to-use-this-guide)
|
||||
2. [Requirement 4: Protect Data in Transit](#requirement-4-protect-data-in-transit)
|
||||
- [4.2.1 — Strong Cryptography for Transmission](#421--strong-cryptography-for-transmission)
|
||||
- [4.2.2 — Certificate Inventory and Validation](#422--certificate-inventory-and-validation)
|
||||
3. [Requirement 3: Protect Stored Cardholder Data (Key Management)](#requirement-3-protect-stored-cardholder-data-key-management)
|
||||
- [3.6 — Cryptographic Key Documentation](#36--cryptographic-key-documentation)
|
||||
- [3.7 — Key Lifecycle Procedures](#37--key-lifecycle-procedures)
|
||||
4. [Requirement 8: Identify and Authenticate](#requirement-8-identify-and-authenticate)
|
||||
- [8.3 — Strong Authentication](#83--strong-authentication)
|
||||
- [8.6 — Application Account Management](#86--application-account-management)
|
||||
5. [Requirement 10: Log and Monitor](#requirement-10-log-and-monitor)
|
||||
- [10.2 — Implement Automated Audit Logging](#102--implement-automated-audit-logging)
|
||||
- [10.3 — Protect Audit Trail](#103--protect-audit-trail)
|
||||
- [10.4 — Promptly Review and Address Audit Trail Exceptions](#104--promptly-review-and-address-audit-trail-exceptions)
|
||||
- [10.7 — Retain and Protect Audit Trail History](#107--retain-and-protect-audit-trail-history)
|
||||
6. [Requirement 6: Develop and Maintain Secure Systems and Applications](#requirement-6-develop-and-maintain-secure-systems-and-applications)
|
||||
- [6.3.1 — Security Coding Practices](#631--security-coding-practices)
|
||||
- [6.5.10 — Broken Authentication and Cryptography Prevention](#6510--broken-authentication-and-cryptography-prevention)
|
||||
7. [Requirement 7: Restrict Access by Business Need-to-Know](#requirement-7-restrict-access-by-business-need-to-know)
|
||||
- [7.2 — Implement Access Control](#72--implement-access-control)
|
||||
8. [Evidence Summary Table](#evidence-summary-table)
|
||||
9. [Operator Responsibilities](#operator-responsibilities)
|
||||
10. [V3 Enhancements for PCI-DSS](#v3-enhancements-for-pci-dss)
|
||||
11. [Next Steps for Compliance](#next-steps-for-compliance)
|
||||
12. [Questions?](#questions)
|
||||
|
||||
## How to Use This Guide
|
||||
|
||||
Your QSA will request evidence that your certificate and key management systems meet specific PCI-DSS 4.0 requirements. For each applicable requirement, this guide identifies:
|
||||
|
||||
1. **Which certctl features support the control** — API endpoints, database tables, background processes
|
||||
2. **What evidence you can produce** — audit logs, dashboard metrics, API queries, deployment configs
|
||||
3. **Operator responsibilities** — what you must do outside certctl (policy, monitoring, access control)
|
||||
4. **Status** — Available (v1.0 shipped), Planned (future release), or Operator Responsibility (outside scope)
|
||||
|
||||
---
|
||||
|
||||
## Requirement 4: Protect Data in Transit
|
||||
|
||||
**Objective**: Ensure strong cryptography is used to protect sensitive data during transmission.
|
||||
|
||||
### 4.2.1 — Strong Cryptography for Transmission
|
||||
|
||||
**Requirement**: Use appropriate and current cryptographic algorithms for all TLS and SSH connections protecting card data in transit.
|
||||
|
||||
**certctl Support**:
|
||||
- **Automated TLS certificate lifecycle** — Certctl issues TLS certificates to NGINX, Apache HAProxy targets via `POST /api/v1/deployments`. Certificates include RSA 2048-bit and ECDSA P-256 key types (configurable per profile, M11a).
|
||||
- **Control plane TLS enforcement** — All REST API endpoints served exclusively over HTTPS. Agent-to-server heartbeat and work polling use TLS. No plaintext protocol options.
|
||||
- **Issuer connector key negotiation** — ACME v2 (Let's Encrypt, ZeroSSL) validates issuer cryptography. Local CA enforces RSA/ECDSA constraints. step-ca integration ensures Smallstep's cryptography standards.
|
||||
- **Certificate profiles** (M11a) document allowed key types and minimum key sizes per environment (development, production, cardholder-network).
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- Exported certificate inventory via `GET /api/v1/certificates` with key algorithm and size (serial JSON).
|
||||
- Issued certificate details showing RSA 2048+ or ECDSA P-256 for all deployed certificates.
|
||||
- Audit trail (`GET /api/v1/audit`) showing issuer connector selection and certificate profile assignment per certificate.
|
||||
- Target deployment logs showing TLS certificate installation on NGINX/Apache/HAProxy.
|
||||
|
||||
**Operator Responsibility**:
|
||||
- Configure certificate profiles for your environments with approved key algorithms.
|
||||
- Audit cipher suite configuration on deployed targets (certctl deploys certs; you verify target TLS settings).
|
||||
- Periodically review `CERTCTL_KEYGEN_MODE` — must be `agent` in production (never `server`).
|
||||
- Monitor issuer connector configuration to ensure issuers meet your cryptography standards.
|
||||
|
||||
**Status**: **Available** (v1.0 shipped)
|
||||
|
||||
---
|
||||
|
||||
### 4.2.2 — Certificate Inventory and Validation
|
||||
|
||||
**Requirement**: Ensure all TLS/SSL certificates used for data transmission are valid, current, and meet required cryptographic standards.
|
||||
|
||||
**certctl Support**:
|
||||
|
||||
- **Managed Certificate Inventory** — Full CRUD API (`/api/v1/certificates`) with sortable, filterable list. Fields: common name, SANs, subject, issuer, serial number, key type/size, not-before/after dates, issuer ID, profile ID, owner, team, status (Active/Expiring/Expired/Revoked).
|
||||
|
||||
- **Filesystem Certificate Discovery** (M18b) — Agents scan configured directories (`CERTCTL_DISCOVERY_DIRS` env var) for existing PEM/DER certificates every 6 hours and on startup. Control plane deduplicates by SHA-256 fingerprint. Three triage statuses: Unmanaged (not managed by certctl), Managed (linked to a managed certificate), Dismissed (operator-marked as out-of-scope).
|
||||
- API endpoints:
|
||||
- `GET /api/v1/discovered-certificates?status=Unmanaged` — find orphaned certs
|
||||
- `GET /api/v1/discovery-summary` — aggregate counts by status
|
||||
- `POST /api/v1/discovered-certificates/{id}/claim` — link to managed certificate
|
||||
- `POST /api/v1/discovered-certificates/{id}/dismiss` — mark out-of-scope
|
||||
|
||||
- **Expiration Threshold Alerting** — Renewal policies support `alert_thresholds_days` (default 30, 14, 7, 0). Background scheduler evaluates daily; certificates transition to Expiring/Expired status automatically. Notifications sent to owners via email/webhook/Slack/Teams/PagerDuty.
|
||||
|
||||
- **Certificate Status Tracking** — Four statuses: Active (deployed, not yet expired), Expiring (within threshold, awaiting renewal), Expired (past not-after date), Revoked (revoked via RFC 5280 revocation API). Dashboard charts show status distribution.
|
||||
|
||||
- **Revocation Infrastructure** (M15a, M15b):
|
||||
- CRL endpoint: `GET /api/v1/crl` (JSON format) or `GET /api/v1/crl/{issuer_id}` (DER X.509 CRL, 24h validity, signed by issuing CA)
|
||||
- OCSP responder: `GET /api/v1/ocsp/{issuer_id}/{serial}` (returns DER-encoded OCSP response: good/revoked/unknown)
|
||||
- Short-lived cert exemption: certs with TTL < 1 hour skip CRL/OCSP (expiry is sufficient revocation)
|
||||
|
||||
- **Stats API** (M14) — Real-time visibility:
|
||||
- `GET /api/v1/stats/summary` — total certs, by status, by issuer
|
||||
- `GET /api/v1/stats/expiration-timeline?days=90` — expiration distribution (weekly buckets)
|
||||
- `GET /api/v1/stats/job-trends?days=30` — renewal/issuance job success rates
|
||||
- `GET /api/v1/certificates` with `?sort=-notAfter&fields=id,commonName,notAfter,status` — sparse, sorted inventory
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- Discovered certificate report: `GET /api/v1/discovered-certificates` JSON export showing all certs on systems, fingerprints, and status.
|
||||
- Managed certificate inventory: `GET /api/v1/certificates` with filters (`?status=Expiring` for upcoming renewals).
|
||||
- Expiration alert configuration: policy JSON showing `alert_thresholds_days` for each environment.
|
||||
- CRL/OCSP availability proof: HTTP GET requests to `/api/v1/crl` and `/api/v1/ocsp/{issuer}/{serial}` with signed responses.
|
||||
- Audit trail for certificate creation/renewal/revocation: `GET /api/v1/audit?type=certificate_issued,certificate_renewed,certificate_revoked`.
|
||||
- Dashboard charts showing expiration timeline, renewal success trends, status distribution.
|
||||
|
||||
**Operator Responsibility**:
|
||||
- Configure `CERTCTL_DISCOVERY_DIRS` on agents to scan all certificate storage locations (e.g., `/etc/nginx/certs`, `/etc/apache2/certs`, `/usr/local/share/ca-certificates`).
|
||||
- Regularly triage discovered certificates: `GET /api/v1/discovered-certificates?status=Unmanaged`, claim or dismiss each.
|
||||
- Set renewal policies for all certificate profiles with appropriate `alert_thresholds_days` (recommendation: 30, 14, 7, 0).
|
||||
- Monitor expiration dashboard and respond to Expiring alerts before certificates expire.
|
||||
- Verify that issued certificates meet your organization's cryptography standards (key type, key size, SANs).
|
||||
- Test CRL/OCSP endpoints periodically to confirm they are reachable and signed correctly.
|
||||
|
||||
**Status**: **Available** (v1.0 shipped, discovery M18b, revocation M15a/M15b)
|
||||
|
||||
---
|
||||
|
||||
## Requirement 3: Protect Stored Cardholder Data (Key Management)
|
||||
|
||||
**Objective**: Render cardholder data unreadable anywhere it is stored; protect cryptographic keys used to encrypt data.
|
||||
|
||||
### 3.6 — Cryptographic Key Documentation
|
||||
|
||||
**Requirement**: Document and implement all key management processes and procedures covering generation, storage, archival, destruction, and change; protect cryptographic keys; and restrict access to keys to the minimum required.
|
||||
|
||||
**certctl Support**:
|
||||
|
||||
- **Certificate Profile Documentation** (M11a) — Named profiles define allowed key types, maximum TTL, and allowed EKUs per use case. Each profile is a documented policy:
|
||||
```json
|
||||
{
|
||||
"id": "p-web-tls",
|
||||
"name": "Web TLS Production",
|
||||
"allowed_key_types": ["RSA_2048", "ECDSA_P256"],
|
||||
"max_ttl_seconds": 31536000,
|
||||
"require_sans": true,
|
||||
"description": "Production TLS certs for external web services"
|
||||
}
|
||||
```
|
||||
|
||||
- **Owner and Team Tracking** (M11b) — Every certificate is assigned an owner (person + email) and optionally a team. This documents key responsibility and escalation paths.
|
||||
|
||||
- **Issuer Connector Specification** — Configuration and API endpoints document which CA and protocol issues each certificate:
|
||||
- `GET /api/v1/issuers/{id}` returns issuer type (local-ca, acme, step-ca, openssl), CA endpoint, authentication method, constraints
|
||||
- Each issuer type has documented key handling (e.g., Local CA loads CA key from `CERTCTL_CA_CERT_PATH`, step-ca via JWK provisioner)
|
||||
|
||||
- **Immutable Audit Trail** (M19) — Every certificate lifecycle event recorded in append-only `audit_events` table:
|
||||
- `certificate_issued` — when certificate created, by whom, issuer type, profile
|
||||
- `certificate_renewed` — when renewed, by whom, issuer
|
||||
- `certificate_revoked` — when revoked, by whom, RFC 5280 reason code
|
||||
- `certificate_deployed` — when deployed to target, by agent, target type
|
||||
- Query: `GET /api/v1/audit?resource_type=certificate&resource_id={cert_id}`
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- Exported certificate profiles: `GET /api/v1/profiles` showing documented key types, max TTLs, constraints per environment.
|
||||
- Certificate-to-owner mapping: `GET /api/v1/certificates` with owner/team fields.
|
||||
- Issuer configuration audit: `GET /api/v1/issuers` showing CA endpoints, key storage paths, auth methods.
|
||||
- Audit trail for a certificate: `GET /api/v1/audit?resource_type=certificate&resource_id={cert_id}` showing complete lifecycle.
|
||||
|
||||
**Operator Responsibility**:
|
||||
- Define and document certificate profiles for each environment and use case.
|
||||
- Assign owner and team to each certificate via API or dashboard.
|
||||
- Document issuer connector configuration (CA endpoint, auth method, key storage location).
|
||||
- Maintain baseline audit trail exports for compliance evidence.
|
||||
- Establish certificate retirement policy (how long to retain audit records after certificate expiry/revocation).
|
||||
|
||||
**Status**: **Available** (v1.0 shipped)
|
||||
|
||||
---
|
||||
|
||||
### 3.7 — Key Lifecycle Procedures
|
||||
|
||||
**Requirement**: Generate, store, protect, access, and destroy cryptographic keys used to encrypt data in transit or at rest.
|
||||
|
||||
This requirement covers key generation, storage, rotation, and destruction. Certctl addresses the certificate/TLS key portion (not symmetric encryption keys used for cardholder data at rest — those are outside scope).
|
||||
|
||||
#### 3.7.1 — Key Generation
|
||||
|
||||
**Requirement**: Generate new keys using strong cryptography.
|
||||
|
||||
**certctl Support**:
|
||||
|
||||
- **Agent-Side Key Generation** (M8) — Production mode (default `CERTCTL_KEYGEN_MODE=agent`):
|
||||
- Agents generate ECDSA P-256 key pairs using `crypto/ecdsa` + `crypto/elliptic.P256()` + `crypto/rand` (cryptographically secure random).
|
||||
- Key generation happens **only on the agent**, never on the control plane.
|
||||
- Agent submits Certificate Signing Request (CSR) with public key to control plane via `POST /api/v1/agents/{id}/csr`.
|
||||
- Issued certificate is returned; private key remains on agent at `CERTCTL_KEY_DIR` (default `/var/lib/certctl/keys`).
|
||||
|
||||
- **Server-Side Fallback** (demo/development only) — `CERTCTL_KEYGEN_MODE=server`:
|
||||
- Control plane generates RSA 2048-bit or ECDSA P-256 keys using `crypto/rand` + `crypto/rsa`.
|
||||
- Server signs CSR and stores the private key in the certificate version record for agent deployment. **Security note:** In server keygen mode, the control plane holds private keys — this is why agent keygen mode is the recommended default for production.
|
||||
- **Must not be used in production.** Explicit warning logged: `server-side key generation enabled (CERTCTL_KEYGEN_MODE=server) — private keys touch control plane, demo only`
|
||||
|
||||
- **Issuer-Specific Key Negotiation**:
|
||||
- **ACME (Let's Encrypt, ZeroSSL)**: Let's Encrypt controls key types; certctl requests ECDSA P-256 by default.
|
||||
- **Local CA**: Supports RSA 2048+, ECDSA (P-256, P-384), PKCS#8 format. Key algorithm inherited from CA cert or specified via profile.
|
||||
- **step-ca**: Smallstep's provisioner defines key type; certctl respects server constraints.
|
||||
- **OpenSSL / Custom CA**: User-provided signing script; key type depends on CA backend.
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- Deployment configuration: `CERTCTL_KEYGEN_MODE=agent` in production (verify in `docker-compose.yml`, Kubernetes manifests, or systemd units).
|
||||
- Agent log excerpt showing key generation: Go `crypto/ecdsa.GenerateKey(elliptic.P256())` via agent process logs with CSR submission timestamp.
|
||||
- Certificate CSR audit: `GET /api/v1/audit?type=certificate_issued` showing CSR fingerprint (SHA-256 hash of CSR PEM).
|
||||
- Renewal job logs showing agent-submitted CSR, not server-generated key.
|
||||
|
||||
**Operator Responsibility**:
|
||||
- **Enforce `CERTCTL_KEYGEN_MODE=agent` in all production deployments.** Never use `server` mode outside demos.
|
||||
- Verify agent hardware is adequately isolated (crypto/rand relies on OS `/dev/urandom` quality).
|
||||
- Monitor `CERTCTL_KEY_DIR` on agents for unauthorized file access (use OS-level file audit if available).
|
||||
- Backup agent key directory (`/var/lib/certctl/keys`) as part of disaster recovery procedure.
|
||||
|
||||
**Status**: **Available** (v1.0 shipped)
|
||||
|
||||
#### 3.7.2 — Key Storage and Access Control
|
||||
|
||||
**Requirement**: Restrict cryptographic key access to the minimum required and protect keys from unauthorized access.
|
||||
|
||||
**certctl Support**:
|
||||
|
||||
- **Agent-Side Key Storage** (M8) — Private keys written to `CERTCTL_KEY_DIR` (default `/var/lib/certctl/keys`):
|
||||
- File permissions: `0600` (readable/writable by agent process owner only).
|
||||
- Filename convention: one file per certificate (e.g., `web-tls-prod.key`, `api-service.key`).
|
||||
- No key data passed over the network between agent and control plane (CSR only).
|
||||
- Keys used locally by agent to sign TLS handshakes, never transmitted to control plane or other systems.
|
||||
|
||||
- **Control Plane Key Storage** — Sensitive credentials managed via environment variables or `.env` files:
|
||||
- CA private key path: `CERTCTL_CA_CERT_PATH` + `CERTCTL_CA_KEY_PATH` (for Local CA sub-CA mode).
|
||||
- ACME account key: embedded in ACME issuer config (not stored separately; ACME library handles in memory).
|
||||
- step-ca provisioner key: `CERTCTL_STEPCA_KEY_PATH` env var (path to JWK private key file, loaded into memory during runtime).
|
||||
- API keys: `CERTCTL_API_KEY` (SHA-256 hashed in database, plaintext never stored).
|
||||
- Database credentials: `CERTCTL_DATABASE_URL` in `.env` file, not in source code.
|
||||
|
||||
- **Docker Compose Credential Management** — `.env` file (git-ignored) holds all secrets:
|
||||
```bash
|
||||
CERTCTL_API_KEY=sk-test-...
|
||||
CERTCTL_DATABASE_URL=postgres://user:pass@db:5432/certctl
|
||||
CERTCTL_CA_KEY_PATH=/run/secrets/ca.key
|
||||
```
|
||||
Credentials never in `docker-compose.yml` or Dockerfile.
|
||||
|
||||
- **Kubernetes Secrets** (operator responsibility) — Deploy control plane with:
|
||||
```yaml
|
||||
env:
|
||||
- name: CERTCTL_DATABASE_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: certctl-secrets
|
||||
key: database-url
|
||||
- name: CERTCTL_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: certctl-secrets
|
||||
key: api-key
|
||||
```
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- Agent key directory listing (without keys): `ls -la /var/lib/certctl/keys` (shows file count, permissions, timestamps).
|
||||
- Deployment manifest (`docker-compose.yml` or Kubernetes YAML) showing secrets via env var or Secret object (not inline).
|
||||
- `.env` file (do not share contents, only confirm existence and git-ignore status).
|
||||
- API key hash verification: `GET /api/v1/auth/check` with API key, verifying hash matching without plaintext exposure.
|
||||
|
||||
**Operator Responsibility**:
|
||||
- **Store `.env` and credential files outside version control.** Verify `.gitignore` includes `.env`, `*.key`, `ca.key`, etc.
|
||||
- **Restrict file system access to `/var/lib/certctl/keys` on agents** via OS-level permissions (Linux: `chmod 0700`, owned by agent user).
|
||||
- **Limit CA key file read access** — `CERTCTL_CA_KEY_PATH` should be readable only by certctl server process (OS permissions).
|
||||
- **Rotate API keys periodically** (recommendation: annually or when personnel changes). No audit trail for API key rotation (outside certctl scope).
|
||||
- **Backup private key stores** (agent key dirs, CA key file) as part of disaster recovery. Encrypt backups at rest.
|
||||
- **Monitor access logs** to `/var/lib/certctl/keys` and CA key file location (use OS audit or file integrity monitoring).
|
||||
|
||||
**Status**: **Available** (v1.0 shipped)
|
||||
|
||||
#### 3.7.3 — Key Rotation
|
||||
|
||||
**Requirement**: Rotate cryptographic keys upon expiration or compromise.
|
||||
|
||||
**certctl Support**:
|
||||
|
||||
- **Automated Certificate Renewal** — Renewal policies trigger certificate renewal automatically:
|
||||
- Background scheduler checks every 60 minutes (configurable via `CERTCTL_SCHEDULER_RENEWAL_CHECK_INTERVAL`).
|
||||
- For each policy, evaluates all managed certificates: if `(not-after - now) <= policy.renewal_threshold_days`, trigger renewal.
|
||||
- Renewal job created in AwaitingCSR state; agent receives work, generates new key pair, submits new CSR.
|
||||
- Issuer connector signs new CSR with new key; old key discarded by agent after new certificate installed.
|
||||
- New certificate deployed to target via deployment job.
|
||||
|
||||
- **Expiration-Based Rotation** — Certificate profiles (M11a) define `max_ttl_seconds` (e.g., 31536000 for 1 year, 3600 for short-lived certs):
|
||||
- Short-lived certificates (TTL < 1 hour) rotate every deployment cycle, providing defense-in-depth (RFC 5280 revocation not needed).
|
||||
- Longer-lived certs (90/180/365 days) rotated via renewal policy thresholds (30/14/7 day alerts).
|
||||
|
||||
- **Renewal Audit Trail** — Every renewal recorded:
|
||||
- `GET /api/v1/audit?type=certificate_renewed&resource_id={cert_id}` shows each renewal, old serial, new serial, issuer, actor.
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- Renewal policy configuration: `GET /api/v1/policies` showing `renewal_threshold_days` and `alert_thresholds_days`.
|
||||
- Renewal job history: `GET /api/v1/jobs?type=Renewal&status=Completed` with timestamp, before/after serial numbers.
|
||||
- Certificate version history: `GET /api/v1/certificates/{id}/versions` showing all issued versions, dates, issuers.
|
||||
- Audit trail: `GET /api/v1/audit?type=certificate_renewed` for trending and compliance reporting.
|
||||
|
||||
**Operator Responsibility**:
|
||||
- **Define renewal policies for all certificate profiles** with appropriate thresholds (typically 30 days before expiration for 90+ day certs, more aggressive for shorter-lived).
|
||||
- **Monitor renewal job success** via dashboard (M14 charts show renewal success trends) and alerts.
|
||||
- **Investigate renewal failures** (stuck AwaitingCSR, issuer connectivity, deployment errors) promptly to avoid expired certificates.
|
||||
- **Test renewal workflow in staging environment** before rolling out to production.
|
||||
- **Document key rotation schedule** for your organization (renewal policy thresholds, approval workflows if AwaitingApproval).
|
||||
|
||||
**Status**: **Available** (v1.0 shipped)
|
||||
|
||||
#### 3.7.4 — Key Destruction
|
||||
|
||||
**Requirement**: Render cryptographic keys unreadable and unusable when they reach the end of their cryptographic lifetime.
|
||||
|
||||
**certctl Support**:
|
||||
|
||||
- **Certificate Revocation API** (M15a) — `POST /api/v1/certificates/{id}/revoke` with RFC 5280 reason codes:
|
||||
- `unspecified` — general revocation
|
||||
- `keyCompromise` — suspected key compromise
|
||||
- `caCompromise` — CA compromise
|
||||
- `affiliationChanged`, `superseded`, `cessationOfOperation`, `certificateHold`, `privilegeWithdrawn` — lifecycle management
|
||||
- Revocation recorded in `certificate_revocations` table with timestamp and reason.
|
||||
- Issuer notified (best-effort; ACME lacks standard revocation, Local CA skips issuer step).
|
||||
- Revocation notifications sent to owner via email/webhook/Slack/Teams/PagerDuty.
|
||||
|
||||
- **CRL and OCSP Publication** (M15b) — Revoked certificates published in:
|
||||
- CRL: `GET /api/v1/crl` (JSON format) or `GET /api/v1/crl/{issuer_id}` (DER X.509, signed by CA, 24h validity)
|
||||
- OCSP: `GET /api/v1/ocsp/{issuer_id}/{serial}` (returns revoked status for clients validating certificate chain)
|
||||
- Clients checking certificate status via OCSP or CRL see revoked status within 24 hours.
|
||||
|
||||
- **Private Key Destruction on Agent** — When certificate renewed or revoked:
|
||||
- Agent removes old private key file from `CERTCTL_KEY_DIR` when new certificate deployed.
|
||||
- Job status tracking confirms old key is no longer needed.
|
||||
- No audit trail of key deletion (private keys don't pass through control plane).
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- Revocation requests: `GET /api/v1/audit?type=certificate_revoked` with RFC 5280 reason codes.
|
||||
- CRL publication: HTTP GET `/api/v1/crl` and parse JSON to show revoked serial numbers and timestamps.
|
||||
- OCSP responder validation: Query `GET /api/v1/ocsp/{issuer}/{serial}` for a known-revoked cert; response includes `revoked` status.
|
||||
- Audit trail: Certificate status transitions (Active → Revoked) recorded in `audit_events`.
|
||||
|
||||
**Operator Responsibility**:
|
||||
- **Revoke certificates immediately upon key compromise suspicion** using reason code `keyCompromise`.
|
||||
- **Revoke certificates at end of lifecycle** (host decommissioning, service sunset) using reason code `cessationOfOperation`.
|
||||
- **Monitor CRL/OCSP availability** — ensure clients can check revocation status (test with TLS validator tools).
|
||||
- **Establish certificate revocation procedure** (who can revoke, approval workflow if required, documentation).
|
||||
- **Physically destroy backup private keys** (if offline backups are kept) when certificate is revoked or after archival period expires.
|
||||
- **Test revocation workflow in staging** — issue test cert, revoke, verify OCSP/CRL reflects revocation within SLA.
|
||||
|
||||
**Status**: **Available** (v1.0 shipped)
|
||||
|
||||
---
|
||||
|
||||
## Requirement 8: Identify and Authenticate
|
||||
|
||||
**Objective**: Limit access to system components and cardholder data by business need-to-know, and authenticate and manage all access.
|
||||
|
||||
### 8.3 — Strong Authentication
|
||||
|
||||
**Requirement**: Authentication mechanisms must use strong cryptography and render authentication credentials (passwords, passphrases, keys) unreadable during transmission and storage.
|
||||
|
||||
**certctl Support**:
|
||||
|
||||
- **API Key Authentication** — All REST API endpoints require authentication (default):
|
||||
- Bearer token format: `Authorization: Bearer sk-...`
|
||||
- Key stored as SHA-256 hash in database (plaintext never persisted).
|
||||
- Comparison uses `crypto/subtle.ConstantTimeCompare` to prevent timing attacks.
|
||||
- Configuration: `CERTCTL_AUTH_TYPE=api-key` (enforced by default, no opt-out without explicit env var).
|
||||
|
||||
- **GUI Authentication Context** — Web dashboard login flow:
|
||||
- Login page (`/login`) accepts API key entry.
|
||||
- AuthProvider context stores API key in session (localStorage in browser, sent in Authorization header for all API calls).
|
||||
- 401 Unauthorized responses trigger automatic redirect to login.
|
||||
- Logout button clears session.
|
||||
- No session server-side (stateless API).
|
||||
|
||||
- **Credential Transmission** — All API traffic over TLS:
|
||||
- HTTPS enforced at server level (no plaintext HTTP).
|
||||
- API key transmitted in Authorization header (not URL parameter, not cookie).
|
||||
- Browser to server: TLS.
|
||||
- Agent to server: TLS.
|
||||
- No credential logging (API key hash only, never plaintext).
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- API configuration: `CERTCTL_AUTH_TYPE=api-key` in deployment manifest.
|
||||
- Database schema: `api_keys` table showing SHA-256 hash column, not plaintext.
|
||||
- API audit log: `GET /api/v1/audit?action=api_call` showing Bearer token validation (no plaintext keys logged).
|
||||
- TLS certificate on control plane: `openssl s_client -connect {server}:8443` showing valid certificate, TLS 1.2+, strong cipher.
|
||||
- GUI login flow: browser network tab showing Authorization header (token value redacted in compliance report).
|
||||
|
||||
**Operator Responsibility**:
|
||||
- **Issue API keys to users/systems** requiring API access (outside certctl; you maintain key registry).
|
||||
- **Rotate API keys using zero-downtime rotation** — `CERTCTL_AUTH_SECRET` supports comma-separated keys (e.g., `new-key,old-key`). Add the new key, migrate clients, then remove the old key. Recommendation: rotate at least annually, or immediately when personnel changes.
|
||||
- **Revoke API keys immediately** when user leaves or token is compromised (set `enabled=false` in API key management — not yet implemented in v1, owner must track manually).
|
||||
- **Enforce strong TLS** on control plane: TLS 1.2+, modern ciphers (configure on reverse proxy or `CERTCTL_TLS_*` env vars if operator-controlled).
|
||||
- **Protect `.env` and credential files** where API key is defined (restrict file system access, no version control).
|
||||
- **Monitor API audit trail** for suspicious access patterns (many 401 errors, access from unexpected IPs, etc.).
|
||||
|
||||
**Status**: **Available** (v1.0 shipped)
|
||||
|
||||
### 8.6 — Application Account Management
|
||||
|
||||
**Requirement**: Users' system access must be restricted to the minimum level of application functions or data needed to perform duties. Application accounts (non-human) must use strong authentication.
|
||||
|
||||
**certctl Support**:
|
||||
|
||||
- **No Application Account Management in v1** — Certctl does not manage user accounts (no user directory, LDAP, OIDC).
|
||||
- All authentication via API key (service-to-service or human user with API key).
|
||||
- No per-user roles or permissions (that's V3 RBAC feature).
|
||||
- Single API key shared across team or one key per automation script (operator's responsibility to manage).
|
||||
|
||||
- **Credentials Not in Source Code** — Security hardening:
|
||||
- API keys via `CERTCTL_API_KEY` env var (not in `main.go`, Dockerfile, `docker-compose.yml`).
|
||||
- Database credentials via `CERTCTL_DATABASE_URL` in `.env` (git-ignored).
|
||||
- CA private key path via `CERTCTL_CA_CERT_PATH`/`CERTCTL_CA_KEY_PATH` (not inline).
|
||||
|
||||
- **Service Account Isolation** (planned for V3) — Future RBAC will support:
|
||||
- Automation script API keys with scoped permissions (e.g., read-only, renew-only, deploy-only).
|
||||
- OIDC/SSO for human users with fine-grained role assignment (admin, operator, viewer).
|
||||
- Audit trail showing which account/role performed each action.
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- Deployment manifest (Dockerfile, docker-compose.yml) showing no hardcoded API keys, database credentials, or CA key paths.
|
||||
- `.env` file existence (confirm via CI or compliance check, without sharing contents).
|
||||
- `.gitignore` configuration showing `.env`, `*.key`, secrets excluded.
|
||||
- Code review: grep `main.go`, `config.go` for `CERTCTL_API_KEY` — should only see env var reference, not hardcoded values.
|
||||
|
||||
**Operator Responsibility**:
|
||||
- **Manage API keys externally** (issue, rotate, revoke).
|
||||
- **Document who/what has API key access** (automation scripts, team members, third-party integrations).
|
||||
- **Rotate application credentials** (API keys, database passwords) according to your organization's policy.
|
||||
- **Segregate credentials** — one API key per automation script where possible, or use V3 RBAC scoping.
|
||||
- **Monitor application account usage** via audit trail — `GET /api/v1/audit` filtered by action/actor.
|
||||
|
||||
**Status**: **Available in part** (v1.0: credentials out of source code). **Planned V3**: scoped API keys and RBAC.
|
||||
|
||||
---
|
||||
|
||||
## Requirement 10: Log and Monitor
|
||||
|
||||
**Objective**: Log and monitor access to network resources and cardholder data.
|
||||
|
||||
### 10.2 — Implement Automated Audit Logging
|
||||
|
||||
**Requirement**: Automatically log and monitor all access to system components and records containing cardholder data.
|
||||
|
||||
**certctl Support**:
|
||||
|
||||
- **Immutable API Audit Log** (M19) — Middleware captures every API call:
|
||||
- `audit_events` table (append-only, no UPDATE/DELETE):
|
||||
- `method`: HTTP method (GET, POST, PUT, DELETE)
|
||||
- `path`: API endpoint path only, excluding query parameters (e.g., `/api/v1/certificates` — query strings intentionally omitted to prevent sensitive data persistence in the append-only audit trail)
|
||||
- `actor`: authenticated user/service (extracted from API key or context)
|
||||
- `body_hash`: SHA-256 hash of request body (truncated to 16 chars, first 8 chars shown in logs)
|
||||
- `status_code`: HTTP response status (200, 201, 400, 401, 404, 500, etc.)
|
||||
- `latency_ms`: request duration in milliseconds
|
||||
- `timestamp`: RFC 3339 timestamp
|
||||
|
||||
- **Certificate Lifecycle Events** — Higher-level events logged separately:
|
||||
- `certificate_issued` — new certificate created, issuer, profile, profile ID
|
||||
- `certificate_renewed` — certificate renewed, old/new serial, renewal policy
|
||||
- `certificate_revoked` — certificate revoked, RFC 5280 reason code
|
||||
- `certificate_deployed` — certificate deployed to target, agent, target type
|
||||
- `certificate_validated` — validation job result (success/failure reason)
|
||||
|
||||
- **Job Lifecycle Events** — Job status transitions:
|
||||
- `job_created` — renewal/issuance/deployment/validation job created
|
||||
- `job_status_updated` — job state change (Pending → AwaitingCSR → Running → Completed/Failed)
|
||||
|
||||
- **Policy and Configuration Events** — Administrative changes:
|
||||
- `policy_created`, `policy_updated`, `policy_deleted` — renewal policy changes
|
||||
- `profile_created`, `profile_updated`, `profile_deleted` — certificate profile changes
|
||||
- `issuer_created`, `issuer_deleted` — CA connector registration changes
|
||||
|
||||
- **Excluded Paths** — Health/readiness probes not logged to reduce noise:
|
||||
- `GET /health` (excluded by default)
|
||||
- `GET /ready` (excluded by default)
|
||||
- Configurable via `CERTCTL_AUDIT_EXCLUDE_PATHS` env var
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- Audit trail export: `GET /api/v1/audit` or manual database query, showing sample events with timestamp, actor, action, resource.
|
||||
- API call audit log: Query `audit_events` table showing method, path, actor, status code for last 24-48 hours.
|
||||
- Configuration changes: `GET /api/v1/audit?type=policy_created,policy_updated,issuer_created` showing who changed what and when.
|
||||
- Certificate lifecycle: `GET /api/v1/audit?resource_type=certificate&resource_id={cert_id}` showing complete issuance → deployment → renewal/revocation history.
|
||||
|
||||
**Operator Responsibility**:
|
||||
- **Enable audit logging** — it's on by default; verify `CERTCTL_AUDIT_EXCLUDE_PATHS` is not set to exclude certificate-related paths.
|
||||
- **Monitor audit log growth** — `audit_events` table will grow with every API call. Recommend database maintenance (log rotation policy, archival after 90 days, etc.).
|
||||
- **Export and archive audit logs** — periodically `SELECT * FROM audit_events WHERE timestamp > {date}` and export to secure storage (S3, syslog, SIEM).
|
||||
- **Establish audit review procedure** — QSA may request sample of logs; have export process documented.
|
||||
- **Test audit logging** — make API call, verify event appears in audit trail within seconds.
|
||||
|
||||
**Status**: **Available** (M19 shipped)
|
||||
|
||||
### 10.3 — Protect Audit Trail
|
||||
|
||||
**Requirement**: Promptly protect audit trail files from unauthorized modifications.
|
||||
|
||||
**certctl Support**:
|
||||
|
||||
- **Append-Only Database Design** — PostgreSQL triggers and constraints prevent modification:
|
||||
- `audit_events` table has no `UPDATE` or `DELETE` triggers.
|
||||
- Application code never executes UPDATE/DELETE on `audit_events`.
|
||||
- Primary key is `id` (serial); new events always INSERT.
|
||||
|
||||
- **Read-Only API Access** — Audit events accessible only via read (`GET /api/v1/audit`):
|
||||
- No `POST /api/v1/audit/{id}` endpoint (no creation from API).
|
||||
- No `PUT /api/v1/audit/{id}` endpoint (no modification).
|
||||
- No `DELETE /api/v1/audit/{id}` endpoint (no deletion).
|
||||
- Only control plane can record events (via internal service layer, not exposed API).
|
||||
|
||||
- **Database Access Control** (operator responsibility) — PostgreSQL user permissions:
|
||||
- `certctl` application user: INSERT, SELECT on `audit_events`.
|
||||
- `certctl_read_only` user (for compliance/audit team): SELECT only on `audit_events`.
|
||||
- `postgres` superuser: restricted to DBA operations, logged separately by PostgreSQL.
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- Database schema: `\d audit_events` showing columns, primary key, no UPDATE/DELETE triggers.
|
||||
- Application code review: `internal/service/audit.go` showing `RecordEvent(...)` as only INSERT operation.
|
||||
- API endpoint audit: grep `internal/api/handler/audit*.go` or `internal/api/router/router.go` — no PUT/DELETE routes for events.
|
||||
- PostgreSQL permissions: `psql -d certctl -c "\dp audit_events"` showing INSERT/SELECT grants only.
|
||||
|
||||
**Operator Responsibility**:
|
||||
- **Restrict database access** — issue read-only PostgreSQL user for compliance/audit team (no write privileges).
|
||||
- **Enable PostgreSQL query logging** — log all database connections and operations for DBA audit trail.
|
||||
- **Backup audit logs** — regularly export `audit_events` to offsite storage (S3, archive tape, syslog aggregator) for long-term retention.
|
||||
- **Monitor database modifications** — alert if any UPDATE/DELETE is attempted on `audit_events` (log-based alerting or PostgreSQL event triggers).
|
||||
- **Encrypt audit exports** — if archiving to external storage, encrypt backups at rest.
|
||||
|
||||
**Status**: **Available** (v1.0 shipped)
|
||||
|
||||
### 10.4 — Promptly Review and Address Audit Trail Exceptions
|
||||
|
||||
**Requirement**: Promptly review audit logs and investigate exceptions/anomalies.
|
||||
|
||||
**certctl Support**:
|
||||
|
||||
- **Dashboard Charts** (M14) — Real-time observability:
|
||||
- **Renewal Success Trends** (30-day line chart) — shows job success rate; spikes in failures warrant investigation.
|
||||
- **Certificate Status Distribution** (donut chart) — shows Expiring/Expired counts; high Expired = missed renewals.
|
||||
- **Expiration Timeline** (90-day weekly heatmap) — shows upcoming expirations; bunching = renewal policy tuning needed.
|
||||
- **Issuance Rate** (30-day bar chart) — shows certificate creation/renewal activity; anomalies (zero issuances for weeks) indicate stopped automation.
|
||||
|
||||
- **Stats API** (M14) — Machine-readable trends:
|
||||
- `GET /api/v1/stats/job-trends?days=30` — renewal/issuance/deployment success/failure counts per day.
|
||||
- `GET /api/v1/stats/summary` — total certs, counts by status.
|
||||
- `GET /api/v1/stats/expiration-timeline?days=90` — expiration buckets for forecasting.
|
||||
|
||||
- **Agent Fleet Overview** (M14) — Agent health visibility:
|
||||
- Pie chart: agent status distribution (healthy, offline, error).
|
||||
- Version breakdown: agent versions in use (identify outdated agents).
|
||||
- Per-agent detail: last heartbeat timestamp, OS/architecture, IP address, recent jobs.
|
||||
|
||||
- **Alert Notifications** (M3, M16a) — Configurable escalation:
|
||||
- Email alerts: certificate approaching expiration, renewal failure, revocation notification.
|
||||
- Webhook: custom HTTP POST to your monitoring system (Slack, Teams, PagerDuty, OpsGenie, custom webhook).
|
||||
- Deduplication: one alert per threshold/certificate per day (avoid alert fatigue).
|
||||
|
||||
- **Audit Trail Filtering and Export** (M13) — Compliance reporting:
|
||||
- `GET /api/v1/audit?actor={user}×tamp_after={date}` — filter audit log by actor, timestamp, type.
|
||||
- Export CSV/JSON via dashboard: audit page → select filters → "Export CSV" or "Export JSON".
|
||||
- Can export full audit trail for QSA review.
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- Dashboard screenshots: expiration timeline, renewal success trends, status distribution.
|
||||
- Job trend report: `GET /api/v1/stats/job-trends?days=90` showing success/failure rates.
|
||||
- Agent fleet health: `GET /api/v1/agents` showing heartbeat status, version count distribution.
|
||||
- Audit log sample: `GET /api/v1/audit?limit=100` showing certificate issuance/renewal/revocation activity.
|
||||
- Alert configuration: screenshot of renewal policy `alert_thresholds_days` (30, 14, 7, 0) and notifier settings (email, Slack, etc.).
|
||||
|
||||
**Operator Responsibility**:
|
||||
- **Review dashboard charts weekly** — look for anomalies (high Expired count, failure spike, renewal stalled).
|
||||
- **Respond to alerts promptly** — expiration alert = investigate renewal (check job logs, issuer connectivity, agent heartbeat).
|
||||
- **Set alert thresholds appropriately** — default 30/14/7/0 days is a starting point; adjust per your SLA and staffing.
|
||||
- **Maintain alert distribution list** — ensure alerts reach the right on-call engineer/team.
|
||||
- **Archive and review audit logs** — export monthly/quarterly for compliance trending (e.g., "all certificate changes last quarter").
|
||||
- **Test alert delivery** — trigger a test renewal failure or manual revocation, verify alert is sent.
|
||||
|
||||
**Status**: **Available** (v1.0 shipped, M14 observable charts, M19 audit log)
|
||||
|
||||
### 10.7 — Retain and Protect Audit Trail History
|
||||
|
||||
**Requirement**: Retain audit trail history for at least one year and ensure it can be retrieved.
|
||||
|
||||
**certctl Support**:
|
||||
|
||||
- **Immutable Audit Trail** (M19) — `audit_events` table stores all API calls and certificate lifecycle events with timestamps.
|
||||
- **No Automatic Purge** — Certctl does not delete audit events. They remain in PostgreSQL indefinitely.
|
||||
- **Queryable History** — All events accessible via `GET /api/v1/audit` with time range, actor, resource filters.
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- Database retention policy: confirm `audit_events` table has no DELETE triggers or maintenance jobs that purge events.
|
||||
- Sample audit query: `SELECT COUNT(*) FROM audit_events WHERE timestamp > NOW() - INTERVAL '365 days'` showing one year+ of events.
|
||||
- Export procedure: documented process for exporting audit logs to cold storage (S3, archive tape, syslog).
|
||||
|
||||
**Operator Responsibility**:
|
||||
- **Configure PostgreSQL backup/retention** — certctl relies on database backups for audit trail protection.
|
||||
- Backup `audit_events` table daily or per your RPO/RTO.
|
||||
- Retain backups for at least 1 year (configure retention policy on backup system).
|
||||
- Test restore procedure annually.
|
||||
|
||||
- **Export and archive audit logs** — periodically export `SELECT * FROM audit_events WHERE timestamp > {start_date}` to offsite storage.
|
||||
- Recommendation: monthly exports to S3 with versioning enabled.
|
||||
- Encrypt exports at rest.
|
||||
- Retain archives for at least 3 years (adjust per your compliance requirements).
|
||||
|
||||
- **Monitor audit log growth** — `audit_events` table will grow ~1-5 MB/day depending on API call volume.
|
||||
- Estimate: 10,000 API calls/day = ~50 MB/month.
|
||||
- Plan PostgreSQL storage and backup capacity accordingly.
|
||||
|
||||
**Status**: **Available** (v1.0 shipped)
|
||||
|
||||
---
|
||||
|
||||
## Requirement 6: Develop and Maintain Secure Systems and Applications
|
||||
|
||||
**Objective**: Develop and maintain secure systems and applications.
|
||||
|
||||
### 6.3.1 — Security Coding Practices
|
||||
|
||||
**Requirement**: Develop all custom application code in accordance with secure coding practices and include authentication, access control, input validation, and error handling.
|
||||
|
||||
**certctl Support**:
|
||||
|
||||
- **Input Validation** — Centralized validators enforce strong input constraints:
|
||||
- Common name: max 253 chars, DNS-safe characters only, no leading/trailing hyphens.
|
||||
- CSR PEM: must be valid PEM format (regex validation).
|
||||
- Policy type: whitelist enum (Issuance, Renewal, Revocation, etc.).
|
||||
- API key: alphanumeric + hyphens only.
|
||||
- Implemented in `internal/domain/validation.go` and called from all handler layer inputs.
|
||||
|
||||
- **Error Handling** — No sensitive data leakage in error responses:
|
||||
- HTTP 500 errors return generic "Internal Server Error" message, not stack trace.
|
||||
- Database errors logged internally (structured slog), not exposed to client.
|
||||
- 404 errors do not reveal whether resource exists (consistent "Not Found" regardless of auth vs. not-found).
|
||||
|
||||
- **No Hardcoded Credentials** — All secrets via environment variables:
|
||||
- `CERTCTL_API_KEY`, `CERTCTL_DATABASE_URL`, `CERTCTL_CA_KEY_PATH` — env vars only.
|
||||
- Credentials not in `main.go`, Dockerfile, `docker-compose.yml`, or Git history.
|
||||
- `.env` file git-ignored and excluded from version control.
|
||||
|
||||
- **Dependency Management** — Go module pinning (`go.mod`):
|
||||
- All external dependencies pinned to specific versions.
|
||||
- No wildcard versions or `latest` tags.
|
||||
- CI runs `go mod verify` to detect tampering.
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- Code review: `internal/domain/validation.go` showing input validation functions (Common name length, CSR PEM, policy type, etc.).
|
||||
- Error handling audit: `internal/api/handler/certificates.go` showing HTTP error responses (no stack traces).
|
||||
- Credentials in source code check: `grep -r "CERTCTL_API_KEY\|DATABASE_URL\|CA_KEY" cmd/ internal/ | grep -v ".env"` (should only show env var references, not values).
|
||||
- `go.mod` review: no wildcard versions, all pinned.
|
||||
- CI workflow: `.github/workflows/ci.yml` showing `go mod verify` step.
|
||||
|
||||
**Operator Responsibility**:
|
||||
- **Review dependency updates** — keep Go version current, update certctl dependencies regularly (security patches).
|
||||
- **Scan container images** — use Trivy, Clair, or similar to scan Docker images for known vulnerabilities.
|
||||
- **Maintain secure coding practices** in any custom issuer/target connectors you deploy (scripts for OpenSSL, BASH/PowerShell for IIS/F5).
|
||||
|
||||
**Status**: **Available** (v1.0 shipped)
|
||||
|
||||
### 6.5.10 — Broken Authentication and Cryptography Prevention
|
||||
|
||||
**Requirement**: Prevent broken authentication and cryptography weaknesses.
|
||||
|
||||
**certctl Support**:
|
||||
|
||||
- **Authentication** — API key with SHA-256 hashing, constant-time comparison (`crypto/subtle.ConstantTimeCompare`).
|
||||
- **Cryptography** — Go's `crypto/*` standard library (no weak ciphers). ECDSA P-256, RSA 2048+.
|
||||
- **TLS** — HTTPS enforced (no plaintext HTTP endpoints).
|
||||
- **No Sessions** — Stateless API (no session cookies, no session fixation risk).
|
||||
|
||||
**Status**: **Available** (v1.0 shipped)
|
||||
|
||||
---
|
||||
|
||||
## Requirement 7: Restrict Access by Business Need-to-Know
|
||||
|
||||
**Objective**: Limit access to system components and cardholder data by business need-to-know and ensure users are authenticated and authorized.
|
||||
|
||||
### 7.2 — Implement Access Control
|
||||
|
||||
**Requirement**: Ensure proper user identity management and implement access controls based on business need-to-know.
|
||||
|
||||
**certctl v1 Support** (limited):
|
||||
- **Certificate Ownership** (M11b) — Each certificate assigned to owner (person + email) and optional team. Ownership is metadata; access control is not enforced at API level.
|
||||
- **Agent Groups** (M11b) — Renewal policies target specific agent groups (OS, architecture, CIDR, version). Groups are used for policy targeting, not user access control.
|
||||
- **Interactive Approval** (M11b) — `AwaitingApproval` job state allows manual approval/rejection of renewals (enforcement of business workflows, not user access control).
|
||||
|
||||
**certctl v3 Support** (planned):
|
||||
- **OIDC/SSO** — Okta, Azure AD, Google integration. Users log in via identity provider.
|
||||
- **Role-Based Access Control (RBAC)** — Three roles: admin (all operations), operator (issue/renew/deploy), viewer (read-only). Roles assigned via OIDC claims or group membership.
|
||||
- **Profile/Owner Gating** — Operator can renew only certificates assigned to their team; viewer cannot modify anything.
|
||||
- **Audit Trail Attribution** — Every action shows which user/role performed it.
|
||||
|
||||
**Evidence You Can Provide** (v1):
|
||||
- Certificate ownership mapping: `GET /api/v1/certificates` showing owner, team fields (metadata only; access not controlled).
|
||||
- Agent group targeting: `GET /api/v1/policies` showing `agent_group_id` field.
|
||||
- Interactive approval workflow: job detail showing `AwaitingApproval` state, approve/reject endpoints in API docs.
|
||||
|
||||
**Operator Responsibility** (v1):
|
||||
- **Manage API key distribution** externally — only issue API keys to authorized users/systems.
|
||||
- **Implement reverse proxy auth** (Nginx, Apache, Okta proxy) in front of certctl to enforce OIDC/LDAP (outside certctl).
|
||||
- **Plan for V3 RBAC** — budget for upgrade when finer-grained access control is needed.
|
||||
|
||||
**Planned** (V3):
|
||||
- Upgrade to certctl Pro with OIDC/RBAC and per-role audit trail.
|
||||
|
||||
**Status**: **Available in part** (v1.0: ownership metadata, agent group targeting). **Planned V3**: OIDC/RBAC enforcement.
|
||||
|
||||
---
|
||||
|
||||
## Evidence Summary Table
|
||||
|
||||
| PCI-DSS Requirement | certctl Feature | API/UI Evidence | Database/Config | Audit Trail | Status |
|
||||
|---|---|---|---|---|---|
|
||||
| **4.2.1** Strong Crypto | TLS cert issuance, ACME/step-ca/Local CA, RSA 2048+/ECDSA P-256 | `GET /api/v1/certificates` (key_type, key_size) | Certificate profiles | `GET /api/v1/audit?type=certificate_issued` | Available |
|
||||
| **4.2.2** Cert Inventory & Validation | Managed cert CRUD, discovery (M18b), expiration alerting, CRL/OCSP | `GET /api/v1/certificates`, `GET /api/v1/discovered-certificates`, `GET /api/v1/crl`, `GET /api/v1/ocsp/{issuer}/{serial}` | `managed_certificates`, `discovered_certificates` tables | `GET /api/v1/audit?type=certificate_*` | Available |
|
||||
| **3.6** Key Documentation | Profiles, owner/team tracking, issuer config, audit trail | `GET /api/v1/profiles`, `GET /api/v1/issuers`, certificate detail with owner/team | Profiles, certificate owner/team fields, issuer config | `GET /api/v1/audit?resource_type=certificate` | Available |
|
||||
| **3.7.1** Key Generation | Agent-side ECDSA P-256, server keygen (demo only) | Agent logs, renewal job detail, CSR audit | `CERTCTL_KEYGEN_MODE=agent` (config), job_type=AwaitingCSR | `GET /api/v1/audit?type=certificate_issued` with CSR hash | Available |
|
||||
| **3.7.2** Key Storage | Agent `/var/lib/certctl/keys` (0600), env var secrets, .env excluded | Deployment manifest (env var refs), agent key dir listing | `.env` file (git-ignored), `CERTCTL_KEY_DIR`, `CERTCTL_CA_KEY_PATH` | No API audit (keys off-platform) | Available |
|
||||
| **3.7.3** Key Rotation | Auto renewal, expiration thresholds, renewal jobs | Dashboard renewal trends, `GET /api/v1/jobs?type=Renewal`, certificate versions | Renewal policies, certificate version history | `GET /api/v1/audit?type=certificate_renewed` | Available |
|
||||
| **3.7.4** Key Destruction | Revocation API (RFC 5280), CRL/OCSP, private key cleanup | `POST /api/v1/certificates/{id}/revoke`, `GET /api/v1/crl`, OCSP endpoint | `certificate_revocations` table, CRL publication | `GET /api/v1/audit?type=certificate_revoked` | Available |
|
||||
| **8.3** Strong Authentication | API key (SHA-256 hash, TLS), GUI login, 401 redirect | GUI login screenshot, API key auth header, TLS cert | API key hash in database | `GET /api/v1/audit` showing API calls | Available |
|
||||
| **8.6** Acct Management | Credentials out of source, .env excluded, env var config | Code review (no hardcoded secrets), `.gitignore` check | Deployment manifests showing env var refs only | No account lifecycle audit (outside scope) | Available in part |
|
||||
| **10.2** Audit Logging | API audit middleware (M19), certificate lifecycle events | `GET /api/v1/audit` with filter/pagination | `audit_events` table (every API call) | Real-time via API | Available |
|
||||
| **10.3** Audit Protection | Append-only table design, read-only API, DB permissions | API endpoint audit (no PUT/DELETE on events), DB schema | `audit_events` table, PostgreSQL GRANT SELECT | Immutable by design | Available |
|
||||
| **10.4** Review & Alert | Dashboard charts, stats API, notifier integrations | Dashboard (renewal trends, status pie, expiration heatmap), `GET /api/v1/stats/*` | Job results, alert config in policies | `GET /api/v1/audit?type=job_*` | Available |
|
||||
| **10.7** Retention | 1+ year in PostgreSQL, export/archive procedures | Database query `SELECT COUNT(*) FROM audit_events WHERE timestamp > NOW() - INTERVAL '1 year'` | `audit_events` table retention (no auto-delete) | Manual export/archival (operator) | Available |
|
||||
| **6.3.1** Secure Coding | Input validation, error handling, no hardcoded secrets, dependency pinning | Code review (validation.go, handlers), error responses | `go.mod` with pinned versions, `.gitignore` | GitHub Actions CI with `go mod verify` | Available |
|
||||
| **7.2** Access Control | Ownership metadata, agent groups, interactive approval | `GET /api/v1/certificates` (owner/team), `GET /api/v1/agent-groups` | Certificate owner/team fields, agent group criteria | User identity from auth context | Available in part (V3: RBAC) |
|
||||
|
||||
---
|
||||
|
||||
## Operator Responsibilities
|
||||
|
||||
The following control objectives are **outside certctl's scope** and must be managed by your organization:
|
||||
|
||||
| Control Objective | Responsibility | Example Actions |
|
||||
|---|---|---|
|
||||
| **Network Segmentation** | Isolate certctl control plane from cardholder network | Place certctl on separate VLAN, firewall rules |
|
||||
| **Physical Security** | Restrict access to servers/databases | Data center access controls, logging |
|
||||
| **Personnel Screening** | Background checks for staff with access | HR/employment verification |
|
||||
| **Access Control Enforcement** | User authentication & authorization outside API | Implement reverse proxy with OIDC (V3: use certctl Pro RBAC) |
|
||||
| **Incident Response** | Procedures for certificate compromise or breach | Document key revocation process, alert escalation |
|
||||
| **Disaster Recovery** | Backup and restore procedures | Database backup schedule, offsite replication |
|
||||
| **Change Management** | Approval process for config/cert changes | CAB meetings, documented procedures |
|
||||
| **Vulnerability Scanning** | ASV scanning, penetration testing, code review | Annual PCI-DSS penetration test |
|
||||
| **Key Backup & Escrow** | Secure offline storage of CA private keys (if required) | Hardware security module (HSM) or encrypted vault |
|
||||
| **Audit Log Retention** | Long-term archival and protection of audit logs | Export to S3/syslog, retain 3+ years |
|
||||
| **QSA Engagement** | Schedule and coordination of compliance assessment | Annual audit with qualified security assessor |
|
||||
|
||||
---
|
||||
|
||||
## V3 Enhancements for PCI-DSS
|
||||
|
||||
Certctl v3 (Pro) adds paid features that strengthen PCI-DSS compliance posture:
|
||||
|
||||
| Feature | PCI-DSS Benefit |
|
||||
|---|---|
|
||||
| **OIDC/SSO Authentication** | Centralized identity management, audit integration with corporate directory |
|
||||
| **Role-Based Access Control (RBAC)** | Least-privilege enforcement: admin, operator, viewer roles with profile/team gating |
|
||||
| **Bulk Revocation by Profile/Owner/Agent** | Rapid incident response (revoke all certs in cardholder network in minutes) |
|
||||
| **NATS Event Bus with JetStream Audit Streaming** | Real-time event streaming to SIEM (Splunk, ELK, Datadog) for centralized audit trail |
|
||||
| **Certificate Health Scores** | Proactive risk identification (composite scoring: expiration proximity, rotation age, key strength) |
|
||||
| **Advanced Search DSL** | Complex audit queries (POST /search with nested AND/OR, regex, field projection) for compliance reporting |
|
||||
| **CT Log Monitoring** | Detect unauthorized certificate issuance (security vulnerability detection) |
|
||||
| **DigiCert Issuer Connector** | Enterprise CA integration for compliance audits |
|
||||
|
||||
---
|
||||
|
||||
## Next Steps for Compliance
|
||||
|
||||
1. **Review this mapping with your QSA** — Confirm which requirements apply to your cardholder data environment.
|
||||
|
||||
2. **Configure certctl for your environment**:
|
||||
- Set `CERTCTL_KEYGEN_MODE=agent` in production.
|
||||
- Define certificate profiles with approved key types.
|
||||
- Configure renewal policies with appropriate thresholds (e.g., 30 days for 90-day certs).
|
||||
- Enable notifier integrations (email, Slack, PagerDuty) for alerts.
|
||||
- Plan `CERTCTL_DISCOVERY_DIRS` on agents to scan all certificate locations.
|
||||
|
||||
3. **Implement operator controls**:
|
||||
- Document certificate management procedures (issuance, renewal, revocation, archival).
|
||||
- Establish API key rotation schedule.
|
||||
- Set up audit log export and archival (monthly to S3, retain 1+ year).
|
||||
- Configure PostgreSQL backups (daily, 1+ year retention).
|
||||
- Plan incident response (who revokes certs, escalation process, timeline).
|
||||
|
||||
4. **Test compliance readiness**:
|
||||
- Trigger a test renewal and verify CRL/OCSP publication.
|
||||
- Export audit trail and verify it shows expected events.
|
||||
- Test revocation workflow and confirm OCSP reflects status within 24 hours.
|
||||
- Run discovery scan and verify unknown certs are detected and triaged.
|
||||
|
||||
5. **Prepare evidence for QSA**:
|
||||
- API endpoint documentation (OpenAPI spec: `api/openapi.yaml`).
|
||||
- Audit log sample (last 90 days of events).
|
||||
- Configuration export (profiles, policies, issuer/target definitions).
|
||||
- Deployment manifest (showing env var config, no hardcoded secrets).
|
||||
- Test certificates and CRL/OCSP query results.
|
||||
|
||||
6. **Plan for V3** (if RBAC/centralized audit required):
|
||||
- Evaluate certctl Pro for OIDC/SSO and NATS audit streaming.
|
||||
- Assess integration with existing identity provider (Okta, Azure AD, etc.).
|
||||
|
||||
---
|
||||
|
||||
## Questions?
|
||||
|
||||
For additional guidance on certctl features and PCI-DSS mapping:
|
||||
- Review the [Architecture Guide](architecture.md) for system design.
|
||||
- Check [Connectors Documentation](connectors.md) for issuer/target/notifier capabilities.
|
||||
- Run the [Quick Start Guide](quickstart.md) to see features in action.
|
||||
- Consult your QSA for final compliance determination.
|
||||
|
||||
**Last Updated**: March 24, 2026 (certctl v1.0 with M18b discovery and M19 audit logging)
|
||||
@@ -0,0 +1,575 @@
|
||||
# SOC 2 Type II Compliance Mapping
|
||||
|
||||
This guide maps certctl's implemented features to AICPA SOC 2 Trust Service Criteria (TSC). It is **not a SOC 2 certification claim** — rather, it helps security engineers, auditors, and evaluators understand how certctl supports your organization's SOC 2 compliance posture. Use this as evidence input for your own control assessment during SOC 2 audits.
|
||||
|
||||
## How to Use This Guide
|
||||
|
||||
SOC 2 audits require evidence that your infrastructure meets specific Trust Service Criteria. Auditors ask: "Does your certificate management tooling support CC6.1 logical access controls?" This guide answers by mapping certctl's features to specific criteria and pointing to evidence (API endpoints, configuration, audit trail).
|
||||
|
||||
Each section includes:
|
||||
|
||||
- **The TSC requirement** — what the auditor is looking for
|
||||
- **certctl's implementation** — which features address it
|
||||
- **Evidence location** — where to find proof (API endpoint, config variable, source code, audit events)
|
||||
- **V2 vs V3 status** — whether feature is in the free community edition (V2) or paid Pro edition (V3)
|
||||
- **Operator responsibility** — aspects your organization must handle outside of certctl
|
||||
|
||||
## Contents
|
||||
|
||||
1. [How to Use This Guide](#how-to-use-this-guide)
|
||||
2. [CC6: Logical and Physical Access Controls](#cc6-logical-and-physical-access-controls)
|
||||
- [CC6.1 — Logical Access Security](#cc61--logical-access-security)
|
||||
- [CC6.2 — Prior to Issuing System Credentials](#cc62--prior-to-issuing-system-credentials)
|
||||
- [CC6.3 — Authentication Policies](#cc63--authentication-policies)
|
||||
- [CC6.7 — Information Transmission Protection](#cc67--information-transmission-protection)
|
||||
3. [CC7: System Operations](#cc7-system-operations)
|
||||
- [CC7.1 — System Monitoring](#cc71--system-monitoring)
|
||||
- [CC7.2 — Anomaly Detection](#cc72--anomaly-detection)
|
||||
- [CC7.3 — Incident Response](#cc73--incident-response)
|
||||
- [CC7.4 — Identify and Develop Risk Mitigation Activities](#cc74--identify-and-develop-risk-mitigation-activities)
|
||||
4. [A1: Availability](#a1-availability)
|
||||
- [A1.1/A1.2 — Availability and Recovery](#a11a12--availability-and-recovery)
|
||||
5. [CC8: Change Management](#cc8-change-management)
|
||||
- [CC8.1 — Change Control](#cc81--change-control)
|
||||
6. [Evidence Summary Table](#evidence-summary-table)
|
||||
7. [What Requires Operator Action](#what-requires-operator-action)
|
||||
8. [V3 Enhancements](#v3-enhancements)
|
||||
9. [Conclusion](#conclusion)
|
||||
|
||||
## CC6: Logical and Physical Access Controls
|
||||
|
||||
### CC6.1 — Logical Access Security
|
||||
|
||||
**Requirement**: The entity restricts logical access to digital and information assets and related facilities by applying user identity authentication, registration, access rights, and usage policies.
|
||||
|
||||
**certctl Implementation** (V2 — Community Edition):
|
||||
|
||||
- **API Key Authentication** — All API calls require a Bearer token (hashed with SHA-256, stored securely, validated with constant-time comparison) or are rejected with 401 Unauthorized. Environment: `CERTCTL_AUTH_TYPE` (default `api-key`; `none` requires explicit opt-in with log warning)
|
||||
- **GUI Authentication** — Web dashboard includes login screen requiring API key entry. Failed auth redirects to login on 401. Auth context persists across page navigation. Logout clears session.
|
||||
- **Configurable CORS** — API restricts cross-origin requests via `CERTCTL_CORS_ORIGINS` allowlist or wildcard. Preflight caching prevents chatty browser auth flows.
|
||||
- **Token Bucket Rate Limiting** — Per-IP rate limiting (configurable via `CERTCTL_RATE_LIMIT_RPS` / `CERTCTL_RATE_LIMIT_BURST`) returns 429 Too Many Requests with Retry-After header. Prevents credential stuffing and brute-force attacks.
|
||||
- **No Password Storage** — certctl does not store user passwords. API keys are the sole authentication mechanism. Your API key generation, distribution, and rotation policies are your responsibility (see "Operator Responsibility" below).
|
||||
- **Zero-Downtime Key Rotation** — `CERTCTL_AUTH_SECRET` accepts comma-separated keys (e.g., `new-key,old-key`). All listed keys are validated with constant-time comparison. Operators can add a new key, migrate clients, then remove the old key — no service restart required for the client migration phase. A single-key warning is logged at startup to encourage rotation configuration.
|
||||
|
||||
**Evidence Locations**:
|
||||
|
||||
- API auth implementation: `internal/api/middleware/auth.go`
|
||||
- Auth check endpoint: `GET /api/v1/auth/check` (validates credentials)
|
||||
- Auth info endpoint: `GET /api/v1/auth/info` (returns current auth mode, served without auth so GUI detects mode)
|
||||
- Rate limiting middleware: `internal/api/middleware/rate_limit.go`
|
||||
- CORS configuration: `cmd/server/main.go`, search for `CERTCTL_CORS_ORIGINS`
|
||||
|
||||
**V3 Enhancement**:
|
||||
|
||||
- **OIDC / SSO Integration** — Optional OIDC providers (Okta, Azure AD, Google) with multi-tenant support. API key fallback for service accounts.
|
||||
- **API Key Scoping** — Per-resource or per-action permissions (e.g., "read certificates from production only" or "issue certs, no revoke")
|
||||
|
||||
**Operator Responsibility**:
|
||||
|
||||
- Generate and securely distribute API keys to authorized users and systems
|
||||
- Rotate API keys regularly (recommend quarterly)
|
||||
- Revoke API keys immediately upon employee departure
|
||||
- Do not commit API keys to version control (use `.env` or secrets management)
|
||||
- Implement your own IP allowlisting at the firewall if needed (certctl enforces CORS at the HTTP layer, not at network layer)
|
||||
|
||||
---
|
||||
|
||||
### CC6.2 — Prior to Issuing System Credentials
|
||||
|
||||
**Requirement**: The entity provisions, modifies, disables, and removes user identities and rights based on an authorization process that considers user responsibility level and changes in those responsibilities.
|
||||
|
||||
**certctl Implementation** (V2):
|
||||
|
||||
- **Ownership Attribution** — Certificates can be assigned to an owner (email + name). Owner information is stored and audited (see CC7.2). Ownership is tracked through the lifecycle (issuance, renewal, deployment, revocation). Ownership reassignment is audited via the immutable audit trail.
|
||||
- **Team Assignment** — Owners can be organized into teams. Certificate policies can route notifications to team email addresses.
|
||||
- **Audit Trail Attribution** — Every API call records the actor (extracted from the API key or auth context). The audit trail is immutable — no retroactive modification of who did what.
|
||||
|
||||
**Evidence Locations**:
|
||||
|
||||
- Ownership domain model: `internal/domain/certificate.go` (OwnerID field)
|
||||
- Owner CRUD API: `GET /api/v1/owners`, `POST /api/v1/owners`, `DELETE /api/v1/owners/{id}`
|
||||
- Team CRUD API: `GET /api/v1/teams`, `POST /api/v1/teams`, `DELETE /api/v1/teams/{id}`
|
||||
- Audit trail API: `GET /api/v1/audit` (actor field in every record)
|
||||
|
||||
**V3 Enhancement**:
|
||||
|
||||
- **RBAC (Role-Based Access Control)** — Predefined roles (Admin, Operator, Viewer) with profile-gated permissions. Administrators manage role assignments.
|
||||
|
||||
**Operator Responsibility**:
|
||||
|
||||
- Map certctl's ownership model to your organizational structure (departments, teams, on-call rotations)
|
||||
- Establish a formal access request and approval process
|
||||
- Remove ownership access when team members depart
|
||||
- Document your access review process (audit trail shows *who* made changes, but you must justify *why*)
|
||||
|
||||
---
|
||||
|
||||
### CC6.3 — Authentication Policies
|
||||
|
||||
**Requirement**: The entity determines, documents, communicates, and enforces authentication policies that support the identification and authentication of authorized internal and external users and the transmission of user credentials.
|
||||
|
||||
**certctl Implementation** (V2):
|
||||
|
||||
- **API Key Policy** — All API access requires an API key or explicit opt-out. Opt-out (`CERTCTL_AUTH_TYPE=none`) logs a warning: "WARNING: Auth disabled (CERTCTL_AUTH_TYPE=none) — this is insecure and only for development". Configuration choice is logged at startup.
|
||||
- **Agent Authentication** — Agents authenticate to the server via API keys (same mechanism as users). Agent credentials are separate from user API keys.
|
||||
- **Private Key Policy** — Agent-side key generation is the default (`CERTCTL_KEYGEN_MODE=agent`). Server-side keygen (`CERTCTL_KEYGEN_MODE=server`) requires explicit configuration and logs a warning: "server-side key generation enabled (CERTCTL_KEYGEN_MODE=server) — private keys touch control plane, demo only".
|
||||
- **Password Policy** — Not applicable; certctl uses API keys exclusively. Password management is delegated to your organization's IAM system if you integrate OIDC/SSO (V3).
|
||||
|
||||
**Evidence Locations**:
|
||||
|
||||
- Auth type configuration: `internal/config/config.go`, `CERTCTL_AUTH_TYPE` env var
|
||||
- Startup logging: `cmd/server/main.go` (logs auth mode at server startup)
|
||||
- Keygen mode configuration: `internal/config/config.go`, `CERTCTL_KEYGEN_MODE` env var
|
||||
- Keygen mode warning: `cmd/server/main.go` and `cmd/agent/main.go`
|
||||
|
||||
**V3 Enhancement**:
|
||||
|
||||
- **OIDC Policy** — Mandatory MFA when OIDC is enabled
|
||||
- **API Key Expiration** — Automatic key rotation policies (e.g., 90-day expiration for user keys, no expiration for long-lived service account keys)
|
||||
|
||||
**Operator Responsibility**:
|
||||
|
||||
- Document your API key generation and distribution policy
|
||||
- Establish a formal change control process for auth configuration changes
|
||||
- Test authentication failures (e.g., expired keys, malformed tokens) in a non-production environment
|
||||
- Integrate certctl authentication into your organization's IAM audit reports (who has API keys, when were they issued, who has revoked them)
|
||||
|
||||
---
|
||||
|
||||
### CC6.7 — Information Transmission Protection
|
||||
|
||||
**Requirement**: The entity restricts the transmission, movement, and removal of information in a manner that prevents unauthorized disclosure, whether through digital or non-digital means.
|
||||
|
||||
**certctl Implementation** (V2):
|
||||
|
||||
- **TLS for Control Plane** — All API communication occurs over HTTPS (TLS 1.2+). Server uses `tls.Dial()` for outbound connections to issuers and targets. Configuration: `CERTCTL_SERVER_HOST` (default `127.0.0.1`) + `CERTCTL_SERVER_PORT` (default `8080`; Docker Compose maps to `8443`).
|
||||
- **Agent-to-Server Communication** — Agents submit CSRs and heartbeats over HTTPS to the server using the same TLS stack.
|
||||
- **Private Key Isolation** — Agents generate ECDSA P-256 private keys locally (`crypto/ecdsa` + `crypto/elliptic`). Private keys are never transmitted to the server — agents submit CSRs only. Private keys are stored on agent filesystem (`CERTCTL_KEY_DIR`, default `/var/lib/certctl/keys`) with 0600 (owner read/write only) permissions. Server-side keygen mode logs a development warning; production must use agent-side keygen.
|
||||
- **Certificate Storage** — Signed certificates are stored in PostgreSQL as PEM text (along with metadata). Certificates are not secrets and may be transmitted plaintext. Private keys are never stored on the control plane in production (agent-side keygen mode).
|
||||
- **Deployment via Target Connectors** — Target connectors write certificates and keys to local filesystem or network appliance APIs. For NGINX/Apache httpd, files are written with restrictive permissions (0600 for keys). For F5/IIS (V3+), credentials are scoped to a proxy agent in the same network zone — the server never holds network appliance credentials.
|
||||
|
||||
**Evidence Locations**:
|
||||
|
||||
- TLS configuration: deploy certctl behind a TLS-terminating reverse proxy (NGINX, HAProxy, or cloud load balancer) or use a TLS sidecar
|
||||
- Agent keygen mode: `cmd/agent/main.go` (ECDSA key generation, filesystem storage with 0600)
|
||||
- Private key handling: `internal/connector/target/nginx/nginx.go` and similar (cert/key file write)
|
||||
- Server-side keygen deprecation: `internal/service/renewal.go` (log warning when enabled)
|
||||
|
||||
**V3 Enhancement**:
|
||||
|
||||
- **Hardware Security Module (HSM) Support** — Optional HSM backend for CA key storage (SubCA and Local CA modes)
|
||||
- **Secrets Rotation** — Encrypted key rotation without server restart
|
||||
|
||||
**Operator Responsibility**:
|
||||
|
||||
- Enable TLS on the control plane in production (deploy behind a TLS-terminating reverse proxy or load balancer with valid certificates)
|
||||
- Enforce TLS on agent-to-server communication via firewall rules (no cleartext HTTP)
|
||||
- Protect agent filesystem key storage with:
|
||||
- File-level permissions (already 0600)
|
||||
- Encrypted filesystems (LUKS, BitLocker, or cloud provider equivalents)
|
||||
- Backup encryption (keys backed up to vault or HSM, never in cleartext backups)
|
||||
- Restrict PostgreSQL access to authorized services only (network isolation, authentication)
|
||||
- For target systems, ensure network traffic from agents to targets is encrypted (TLS, IPsec, or VPN)
|
||||
|
||||
---
|
||||
|
||||
## CC7: System Operations
|
||||
|
||||
### CC7.1 — System Monitoring
|
||||
|
||||
**Requirement**: The entity monitors system components and the operation of those components for anomalies that are indicative of malfunction, including the implementation of monitoring tools, the reporting of results of those monitoring activities, and the identification, documentation, analysis, and resolution of system anomalies.
|
||||
|
||||
**certctl Implementation** (V2):
|
||||
|
||||
- **Health Endpoint** — `GET /health` returns 200 OK with service status. Consumed by Docker health checks and Kubernetes probes.
|
||||
- **Readiness Endpoint** — `GET /ready` returns 200 OK when the database is connected and migrations are applied.
|
||||
- **Background Scheduler Monitoring** — 6 background loops run on a fixed schedule:
|
||||
- Renewal loop: every 1 hour, scans for certificates approaching renewal threshold
|
||||
- Job processor loop: every 30 seconds, picks up pending/waiting jobs and advances their state
|
||||
- Health check loop: every 2 minutes, pings agents to detect downtime
|
||||
- Notification dispatcher loop: every 1 minute, sends queued alerts
|
||||
- Short-lived cert expiry loop: every 30 seconds, marks expired short-lived credentials
|
||||
- Network scanner loop: every 6 hours, scans enabled TLS endpoints for certificate discovery
|
||||
Each loop includes error handling and logs failures via structured slog.
|
||||
- **Metrics Endpoints** — Two formats for monitoring integration:
|
||||
- `GET /api/v1/metrics` — JSON object with gauges, counters, and uptime for custom dashboards
|
||||
- `GET /api/v1/metrics/prometheus` — Prometheus exposition format (`text/plain; version=0.0.4`) for native scraping by Prometheus, Grafana Agent, Datadog, and other OpenMetrics-compatible collectors
|
||||
- **Gauges** — `certctl_certificate_total`, `certctl_certificate_active`, `certctl_certificate_expiring`, `certctl_certificate_expired`, `certctl_certificate_revoked`, `certctl_agent_total`, `certctl_agent_active`, `certctl_job_pending`
|
||||
- **Counters** — `certctl_job_completed_total`, `certctl_job_failed_total`
|
||||
- **Uptime** — `certctl_uptime_seconds` (seconds since server start)
|
||||
All values are point-in-time snapshots computed from database tables.
|
||||
- **Structured Logging** — All scheduler operations, API calls, and connector actions log via `slog` (Go's structured logger). Logs include timestamp, level (DEBUG/INFO/WARN/ERROR), structured fields (e.g., `actor`, `resource_id`, `latency_ms`), and request IDs for tracing.
|
||||
- **Request ID Propagation** — Each HTTP request gets a unique ID (`X-Request-ID` header). The ID is included in all correlated logs, making it easy to trace a single request through multiple service layers.
|
||||
|
||||
**Evidence Locations**:
|
||||
|
||||
- Health/readiness endpoints: `internal/api/handler/health.go`
|
||||
- Background scheduler: `internal/scheduler/scheduler.go` (Start method)
|
||||
- Metrics endpoint: `internal/api/handler/metrics.go`
|
||||
- Stats API endpoints (for detailed time-series): `internal/api/handler/stats.go`
|
||||
- `GET /api/v1/stats/summary` — dashboard KPIs
|
||||
- `GET /api/v1/stats/certificates-by-status` — cert counts by status
|
||||
- `GET /api/v1/stats/expiration-timeline?days=N` — cert expiry distribution
|
||||
- `GET /api/v1/stats/job-trends?days=N` — job completion/failure rates
|
||||
- `GET /api/v1/stats/issuance-rate?days=N` — cert issuance volume
|
||||
- Structured logging middleware: `internal/api/middleware/middleware.go`
|
||||
|
||||
**Operator Responsibility**:
|
||||
|
||||
- Configure log aggregation (e.g., ELK, Datadog, Splunk) to centralize certctl logs
|
||||
- Set up alerting on scheduler loop failures (e.g., "renewal loop failed to complete within 2h")
|
||||
- Configure health check monitoring (e.g., Prometheus scrape of `/health` and `/ready`)
|
||||
- Establish thresholds for metrics (e.g., alert if `pending_jobs > 50` or `agents_healthy < total_agents`)
|
||||
- Document your log retention policy (audit requirement often mandates 1+ years)
|
||||
- Integrate certctl metrics into your broader observability stack (Grafana dashboards, SLO tracking)
|
||||
|
||||
---
|
||||
|
||||
### CC7.2 — Anomaly Detection
|
||||
|
||||
**Requirement**: The entity monitors system components and the operation of those components for anomalies that are indicative of malfunction, including the implementation of monitoring tools, the reporting of results of those monitoring activities, and the identification, documentation, analysis, and resolution of system anomalies.
|
||||
|
||||
(This criterion overlaps CC7.1 and extends it to specific anomaly response mechanisms.)
|
||||
|
||||
**certctl Implementation** (V2):
|
||||
|
||||
- **Immutable API Audit Trail** (M19) — Every API call is recorded to `audit_events` table (append-only, no update/delete). Recorded: HTTP method, URL path (query parameters intentionally excluded — see security note), actor (user/agent ID), SHA-256 hash of request body (truncated 16 chars for brevity), response status code, latency in milliseconds. Excluded paths (health, ready) are configurable. Audit records are async (non-blocking) and include a timestamp. **Security: Query parameters are excluded from the audit path** because they may contain cursor tokens, API keys, or sensitive filter values; since the audit trail is append-only with no deletion, any sensitive data recorded would persist permanently.
|
||||
- **Audit Trail API** — `GET /api/v1/audit?actor=...&action=...&resource_id=...&created_after=...&created_before=...` allows searching for anomalous patterns (e.g., "who accessed certificate XYZ and when?", "did anyone revoke certs at 2 AM?").
|
||||
- **Expiration Threshold Alerting** — Certificate renewal policies define alert thresholds (days before expiry): default `[30, 14, 7, 0]`. When a certificate approaches a threshold, a notification is enqueued. Deduplication prevents duplicate alerts for the same cert at the same threshold. Auto status transition: cert moves to `Expiring` status at 30 days, `Expired` at 0 days.
|
||||
- **Certificate Status Auto-Transitions** — When a cert is issued, it's `Active`. As expiry approaches, status auto-transitions to `Expiring` (at 30d threshold). At expiry, status becomes `Expired`. Revoked certs move to `Revoked`. These transitions are recorded in the audit trail.
|
||||
- **Notification Routing** — Alerts are sent via configured notifiers (Email, Slack, Teams, PagerDuty, OpsGenie). Certificates are routed to their owner's email address (or team email if no individual owner). This allows on-call teams to react to anomalies (e.g., "your production cert will expire in 7 days, request renewal now").
|
||||
- **Deployment Rollback** — If a deployment fails or an older certificate needs to be reactivated, operators can trigger a "rollback" via the GUI. This redeploys a previous certificate version to the target. Rollback actions are audited.
|
||||
|
||||
**Evidence Locations**:
|
||||
|
||||
- Audit middleware: `internal/api/middleware/audit.go`
|
||||
- Audit trail API: `internal/api/handler/audit.go`, `GET /api/v1/audit`
|
||||
- Expiration alerting: `internal/service/renewal.go` (CheckRenewal method)
|
||||
- Notification dispatcher: `internal/scheduler/scheduler.go` (notificationTicker)
|
||||
- Status transitions: `internal/service/certificate.go` (auto status update logic)
|
||||
- Audit trail CLI export: `certctl-cli audit export --format csv` / `--format json`
|
||||
|
||||
**V3 Enhancement**:
|
||||
|
||||
- **SIEM Export** — Real-time audit event streaming to SIEM systems (via NATS event bus with JetStream sink)
|
||||
- **Anomaly Rules Engine** — Configurable rules (e.g., "alert if certificate revoked by non-admin", "alert if >10 certs issued in < 1 hour")
|
||||
|
||||
**Operator Responsibility**:
|
||||
|
||||
- Integrate audit trail into your SIEM / log analysis platform
|
||||
- Define alerting rules and thresholds for anomalies (e.g., "revocation of critical cert", "mass issuance")
|
||||
- Establish a formal incident response workflow (audit trail shows *what* happened; you must decide *what to do* about it)
|
||||
- Regularly review audit logs (e.g., monthly compliance audit of who accessed what)
|
||||
- Configure email/Slack/Teams integration so on-call teams are notified of cert expirations immediately
|
||||
- Encrypt audit trail backups (ACID guarantees don't prevent theft of database backups)
|
||||
|
||||
---
|
||||
|
||||
### CC7.3 — Incident Response
|
||||
|
||||
**Requirement**: The entity detects, investigates, and responds to incidents by executing a defined incident response and management process that includes preparation, detection and analysis, containment, eradication, recovery, and post-incident activities.
|
||||
|
||||
**certctl Implementation** (V2):
|
||||
|
||||
- **Revocation API** — `POST /api/v1/certificates/{id}/revoke` with RFC 5280 reason codes:
|
||||
- `unspecified` — catch-all
|
||||
- `keyCompromise` — private key was exposed
|
||||
- `caCompromise` — CA itself was compromised (rare)
|
||||
- `affiliationChanged` — certificate no longer applies to the organization
|
||||
- `superseded` — newer cert is in use
|
||||
- `cessationOfOperation` — service is shutting down
|
||||
- `certificateHold` — temporary revocation (can be "unhold" by reissue)
|
||||
- `privilegeWithdrawn` — access rights revoked
|
||||
Revocation is **immediate** (no approval workflow). The certificate is marked `Revoked` in inventory, an audit event is logged, and optional issuer notification is best-effort. All revoked certs are excluded from active deployments.
|
||||
- **CRL Endpoint** — `GET /api/v1/crl` returns a JSON-formatted Certificate Revocation List (serial, reason, timestamp for each revoked cert). `GET /api/v1/crl/{issuer_id}` returns a DER-encoded X.509 CRL signed by the issuing CA (useful for legacy clients that don't support OCSP).
|
||||
- **OCSP Responder** — `GET /api/v1/ocsp/{issuer_id}/{serial}` returns a signed OCSP response indicating whether a cert is good, revoked, or unknown. Clients (browsers, TLS libraries) query this endpoint to verify cert validity in real-time.
|
||||
- **Revocation Notifications** — When a cert is revoked, notifications are sent to:
|
||||
- Certificate owner (email)
|
||||
- Configured webhooks (if you have a SIEM that subscribes)
|
||||
- Slack/Teams channels (if notifiers are configured)
|
||||
- **Short-Lived Cert Exemption** — Certificates with TTL < 1 hour (configured in profile) skip CRL/OCSP publication. Expiry is the revocation mechanism for short-lived certs (e.g., Kubernetes pod certs, session tokens).
|
||||
- **Deployment Rollback** — If a revoked cert is still deployed (shouldn't happen, but race conditions exist), operators can manually redeploy a previous version via the GUI. Rollback is audited.
|
||||
|
||||
**Evidence Locations**:
|
||||
|
||||
- Revocation API: `internal/api/handler/certificates.go`, `POST /api/v1/certificates/{id}/revoke`
|
||||
- Revocation domain model: `internal/domain/revocation.go` (RevocationReason type with RFC 5280 mapping)
|
||||
- CRL generation: `internal/service/certificate.go` (GenerateDERCRL method)
|
||||
- OCSP signing: `internal/service/certificate.go` (GetOCSPResponse method)
|
||||
- Revocation notifications: `internal/service/notification.go` (SendRevocationNotification)
|
||||
- Short-lived exemption: `internal/domain/revocation.go` (IsShortLivedCert check)
|
||||
|
||||
**V3 Enhancement**:
|
||||
|
||||
- **Bulk Revocation** — Revoke all certs issued by a specific profile, owner, or agent in a single API call (useful for large-scale incidents like CA compromise)
|
||||
- **Revocation Automation** — Trigger revocation based on external events (e.g., employee termination, security breach alert from CT Log monitoring)
|
||||
|
||||
**Operator Responsibility**:
|
||||
|
||||
- Establish an incident response policy (e.g., "keyCompromise → immediate deployment to new cert + notify CISO")
|
||||
- Ensure CRL/OCSP are accessible to all systems using the certs (e.g., CDN or highly-available endpoints if you host on-premises)
|
||||
- Test revocation workflow in staging (verify that revoked certs are actually blocked by clients)
|
||||
- Document justification for revocation (audit trail records *that* a cert was revoked, but not *why* — you must document it separately)
|
||||
- Integrate revocation notifications into your on-call rotation (don't let revocation alerts get lost)
|
||||
|
||||
---
|
||||
|
||||
### CC7.4 — Identify and Develop Risk Mitigation Activities
|
||||
|
||||
**Requirement**: The entity identifies, develops, and implements risk mitigation activities for risks arising from potential business disruptions.
|
||||
|
||||
**certctl Implementation** (V2):
|
||||
|
||||
- **Renewal Job Tracking** — Renewal jobs track the certificate, target agents, and issuance outcome. Failed renewals are retried (configurable backoff). Job state diagram: Pending → Running → Completed (or Failed). Failed jobs trigger notifications.
|
||||
- **Agent Health Monitoring** — Health check loop (every 2m) pings all agents via heartbeat. If an agent misses 3 consecutive heartbeats, it's marked as `Unhealthy`. Unhealthy agents are excluded from new deployments.
|
||||
- **Job Cancellation** — Operators can cancel pending jobs via `POST /api/v1/jobs/{id}/cancel`. Useful when a renewal is already in progress elsewhere (multi-instance deployments) or when a certificate is being phased out.
|
||||
- **Interactive Approval** — Renewal/issuance jobs can be put in `AwaitingApproval` status. An authorized operator reviews the pending cert and approves or rejects it. Rejection records a reason in the audit trail. This provides a separation of duty between requestor and approver.
|
||||
- **Scheduled Scanning** — Agents scan configured directories for existing certs (M18b discovery). Operators triage discovered certs (claim = "we manage this now", dismiss = "this is unmanaged and we're OK with that"). Triage decisions are audited.
|
||||
|
||||
**Evidence Locations**:
|
||||
|
||||
- Job state machine: `internal/domain/job.go` (JobStatus enum)
|
||||
- Job retry logic: `internal/scheduler/scheduler.go` (jobProcessorTicker)
|
||||
- Agent health check: `internal/scheduler/scheduler.go` (healthCheckTicker)
|
||||
- Job cancellation: `internal/api/handler/jobs.go`, `POST /api/v1/jobs/{id}/cancel`
|
||||
- Approval workflow: `internal/api/handler/jobs.go`, `POST /api/v1/jobs/{id}/approve` / `reject`
|
||||
- Discovery scan results: `internal/api/handler/discovery.go`, `GET /api/v1/discovered-certificates`
|
||||
|
||||
**Operator Responsibility**:
|
||||
|
||||
- Monitor renewal job success rate (are certs being renewed before expiry?)
|
||||
- Set up alert for unhealthy agents (missing 3+ heartbeats = broken agent, take action)
|
||||
- Establish a formal approval policy (who can approve certs? do they need to involve CISO?)
|
||||
- Test job cancellation and recovery flows in staging
|
||||
- Review discovered certs regularly (are there unmanaged certs that should be managed?)
|
||||
- Document your disaster recovery process (what if control plane database is corrupted?)
|
||||
|
||||
---
|
||||
|
||||
## A1: Availability
|
||||
|
||||
### A1.1/A1.2 — Availability and Recovery
|
||||
|
||||
**Requirement**: The entity obtains or generates, uses, retains, and disposes of information to enable the entity to meet its objectives and respond to its responsibility to provide information.
|
||||
|
||||
**certctl Implementation** (V2):
|
||||
|
||||
- **Health Probes** — `/health` and `/ready` endpoints support container orchestration (Docker Compose, Kubernetes, etc.). Docker Compose defines health checks for the server and database. Kubernetes would use liveness/readiness probes pointing to these endpoints.
|
||||
- **Database Migrations (Idempotent)** — PostgreSQL migrations use `IF NOT EXISTS` and `ON CONFLICT ... DO NOTHING` patterns. Migrations can be safely reapplied — no risk of doubling data or dropping tables mid-migration.
|
||||
- **Agent Panic Recovery** — Agent binary includes panic recovery in job execution loops. If an agent crashes during a deployment, the control plane marks the job as failed and can retry on a healthy agent.
|
||||
- **Exponential Backoff** — Agent-to-server communication uses exponential backoff (starting at 1s, capped at 5m) to handle transient network failures. This prevents thundering herd when the control plane is temporarily down.
|
||||
- **Docker Compose Deployment** — Includes health checks for server and database. Services auto-restart on failure.
|
||||
- **PostgreSQL Connection Pooling** — Server uses `database/sql` with configurable `MaxOpenConns` and `MaxIdleConns` (default 25/5). Prevents connection exhaustion.
|
||||
|
||||
**Evidence Locations**:
|
||||
|
||||
- Health endpoints: `internal/api/handler/health.go`
|
||||
- Database migrations: `migrations/` directory (all use `IF NOT EXISTS`, idempotent patterns)
|
||||
- Agent panic recovery: `cmd/agent/main.go` (defer recover() in job execution)
|
||||
- Exponential backoff: `cmd/agent/main.go` (heartbeat and work poll backoff logic)
|
||||
- Connection pooling: `cmd/server/main.go` (SetMaxOpenConns, SetMaxIdleConns)
|
||||
|
||||
**V3 Enhancement**:
|
||||
|
||||
- **Multi-Region HA** — Control plane federation with etcd consensus (operator can run N replicas)
|
||||
- **PostgreSQL HA** — Replication standby with automatic failover (operator responsibility to configure)
|
||||
|
||||
**Operator Responsibility**:
|
||||
|
||||
- Configure PostgreSQL backups (e.g., WAL archiving, daily full backups). Certctl stores certificates but *also* stores renewal policies, audit trail, deployment history.
|
||||
- Test backup/restore process in staging (broken backups are discovered during incidents)
|
||||
- Monitor disk usage (PostgreSQL will fail if `/var` fills up)
|
||||
- Plan capacity (how many certs, agents, jobs can your PostgreSQL handle? Certctl is tested with 10k+ certs, 100+ agents, but your infra may differ)
|
||||
- Set up high-availability PostgreSQL if you need zero-downtime upgrades
|
||||
- Implement network segmentation (only authorized services can reach certctl API and database)
|
||||
|
||||
---
|
||||
|
||||
## CC8: Change Management
|
||||
|
||||
### CC8.1 — Change Control
|
||||
|
||||
**Requirement**: The entity identifies, selects, and develops risk mitigation activities for risks arising from potential business disruptions.
|
||||
|
||||
**certctl Implementation** (V2):
|
||||
|
||||
- **Certificate Profiles** — Named profiles define allowed key types, max TTL, required SANs, and permitted EKUs. Changes to profiles are common (e.g., "increase max TTL from 1 year to 3 years"). All profile changes are audited (who changed what, when). Profile updates are versioned.
|
||||
- **Policy Engine** — Renewal policies define alert thresholds and approval workflows. Policy changes (e.g., "lower alert threshold from 30 days to 14 days") are audited. Policies have violation rules (e.g., "flag certs longer than 3 years") — violations are recorded in the audit trail.
|
||||
- **Target Configuration** — When a new target (NGINX server, HAProxy load balancer) is added, it's registered with a name and configuration (JSON). Target deletions require confirmation (to prevent accidental removal). All target changes are audited.
|
||||
- **Immutable Audit Trail** — Every change (profile, policy, target, cert, agent, owner, team, approval, revocation, deployment) is recorded in `audit_events`. Audit records are append-only; no retroactive modification is possible. Audit trail is encrypted at rest (operator responsibility).
|
||||
- **GitHub Actions CI** — Pull requests must pass:
|
||||
- Go unit tests (`go test ./...`) with coverage gates (service layer ≥30%, handler layer ≥50%)
|
||||
- Go vet (static analysis)
|
||||
- Frontend TypeScript type checking (`tsc`)
|
||||
- Frontend Vitest unit tests
|
||||
- Frontend Vite build (ensures no broken imports)
|
||||
Only after all checks pass can the PR be merged and deployed.
|
||||
|
||||
**Evidence Locations**:
|
||||
|
||||
- Profile CRUD: `internal/api/handler/profiles.go`, `GET /api/v1/profiles` / `POST` / `PUT` / `DELETE`
|
||||
- Policy CRUD: `internal/api/handler/policies.go`
|
||||
- Target CRUD: `internal/api/handler/targets.go`
|
||||
- Audit trail: `internal/api/handler/audit.go`, `GET /api/v1/audit` (records action, actor, resource_id, timestamp)
|
||||
- CI configuration: `.github/workflows/ci.yml` (test, vet, coverage gates, build checks)
|
||||
|
||||
**V3 Enhancement**:
|
||||
|
||||
- **Change Approval Workflow** — Optional approval gate before profile/policy changes go live
|
||||
- **Feature Flags** — Enable/disable new features without redeployment (backward compatibility during rolling upgrades)
|
||||
|
||||
**Operator Responsibility**:
|
||||
|
||||
- Implement formal change control (ticket system, approval, peer review)
|
||||
- Document the business justification for profile/policy changes
|
||||
- Test changes in a non-production environment before deploying to production
|
||||
- Have a rollback plan (can you revert a profile change instantly if it breaks issuance?)
|
||||
- Include certctl configuration changes in your change log (for audits and incident investigations)
|
||||
- Version control your certctl configuration (Docker Compose file, environment variables) so you can track changes
|
||||
|
||||
---
|
||||
|
||||
## Evidence Summary Table
|
||||
|
||||
| SOC 2 Criterion | certctl Feature | Evidence Location | V2 (Free) | V3 (Pro) | Operator Responsibility |
|
||||
|---|---|---|---|---|---|
|
||||
| **CC6.1** Logical Access Security | API Key Authentication (SHA-256 hashed, constant-time comparison) | `internal/api/middleware/auth.go` | ✅ | Enhanced | API key generation, distribution, rotation |
|
||||
| | GUI Login with API Key | `web/src/pages/LoginPage.tsx` | ✅ | Enhanced (OIDC) | NA |
|
||||
| | CORS Allowlist | `CERTCTL_CORS_ORIGINS` env var | ✅ | ✅ | Configure appropriately |
|
||||
| | Token Bucket Rate Limiting | `internal/api/middleware/rate_limit.go` | ✅ | ✅ | Monitor for brute-force attempts |
|
||||
| **CC6.2** Prior to Issuing System Credentials | Ownership Attribution | `GET /api/v1/owners`, audit trail records owner assignment | ✅ | Enhanced (RBAC) | Map to org structure, remove on departure |
|
||||
| | Team Assignment | `GET /api/v1/teams` | ✅ | ✅ | NA |
|
||||
| | Actor Attribution in Audit Trail | `GET /api/v1/audit` (actor field) | ✅ | ✅ | Justify all changes via separate documentation |
|
||||
| **CC6.3** Authentication Policies | API Key Enforcement | `CERTCTL_AUTH_TYPE=api-key` (default) | ✅ | Enhanced (OIDC, MFA) | Document policy, test failures, integrate into IAM audit |
|
||||
| | Agent Authentication | Separate API keys for agents | ✅ | ✅ | Rotate agent keys, monitor compromise |
|
||||
| | Agent-Side Key Generation | `CERTCTL_KEYGEN_MODE=agent` (default) | ✅ | ✅ | Protect agent filesystem keys via encryption/backup |
|
||||
| | Private Key Policy | Server-side keygen logs warning, disabled in production | ✅ | ✅ | Never use server-side keygen in production |
|
||||
| **CC6.7** Information Transmission Protection | TLS for Control Plane | Deploy behind TLS-terminating reverse proxy | ✅ | ✅ | Enable TLS in production via reverse proxy |
|
||||
| | Agent-to-Server HTTPS | Agents use HTTPS for all API calls | ✅ | ✅ | Enforce TLS via firewall rules |
|
||||
| | Private Key Isolation | Agent-side keygen (ECDSA P-256), keys stored 0600 on agent FS | ✅ | ✅ | Encrypt agent filesystems, backup securely |
|
||||
| | Pull-Only Deployment | Server never initiates outbound to agents/targets | ✅ | Enhanced (HSM, proxy agents) | Encrypt agent↔target comms, isolate proxy agents |
|
||||
| **CC7.1** System Monitoring | Health Endpoint | `GET /health`, `GET /ready` | ✅ | ✅ | Integrate into monitoring (Prometheus, DataDog) |
|
||||
| | Metrics JSON Endpoint | `GET /api/v1/metrics` (gauges, counters, uptime) | ✅ | ✅ | Set thresholds, configure alerting |
|
||||
| | Stats API (time-series) | `GET /api/v1/stats/*` (summary, status, expiration, jobs, issuance) | ✅ | ✅ | Integrate into dashboards, SLO tracking |
|
||||
| | Structured Logging | `slog` middleware with request IDs | ✅ | ✅ | Aggregate logs to SIEM, define retention policy |
|
||||
| | Background Scheduler | 6 loops (renewal 1h, jobs 30s, health 2m, notifications 1m, short-lived 30s, network scan 6h) | ✅ | ✅ | Alert on scheduler loop failures |
|
||||
| **CC7.2** Anomaly Detection | Immutable API Audit Trail | `internal/api/middleware/audit.go`, `GET /api/v1/audit` | ✅ | Enhanced (SIEM export) | Integrate into SIEM, search for anomalies, archive long-term |
|
||||
| | Expiration Threshold Alerting | Configurable per-policy (default 30/14/7/0 days) | ✅ | ✅ | Configure thresholds, integrate notifications |
|
||||
| | Status Auto-Transitions | Active → Expiring (30d) → Expired (0d) | ✅ | ✅ | Monitor status changes in audit trail |
|
||||
| | Notification Routing | Email, Slack, Teams, PagerDuty, OpsGenie | ✅ | ✅ | Configure notifiers, on-call integration |
|
||||
| | Deployment Rollback | Redeploy previous cert version via GUI | ✅ | ✅ | Audit rollback decisions |
|
||||
| **CC7.3** Incident Response | Revocation API (RFC 5280 reasons) | `POST /api/v1/certificates/{id}/revoke` | ✅ | Enhanced (bulk revocation) | Establish incident response policy |
|
||||
| | CRL Endpoint (JSON + DER) | `GET /api/v1/crl`, `GET /api/v1/crl/{issuer_id}` | ✅ | ✅ | Ensure CRL/OCSP accessible to all clients |
|
||||
| | OCSP Responder | `GET /api/v1/ocsp/{issuer_id}/{serial}` | ✅ | ✅ | Test revocation in staging |
|
||||
| | Revocation Notifications | Email, webhook, Slack/Teams on revocation | ✅ | ✅ | Integrate into on-call, document justification separately |
|
||||
| | Short-Lived Cert Exemption | TTL < 1h skip CRL/OCSP | ✅ | ✅ | Configure profiles appropriately |
|
||||
| **CC7.4** Risk Mitigation | Renewal Job Tracking | Job state machine (Pending → Running → Completed/Failed) | ✅ | ✅ | Monitor renewal success rate |
|
||||
| | Agent Health Monitoring | Health check loop (ping every 2m, mark unhealthy after 3 misses) | ✅ | ✅ | Alert on unhealthy agents, investigate |
|
||||
| | Job Cancellation | `POST /api/v1/jobs/{id}/cancel` | ✅ | ✅ | Test in staging |
|
||||
| | Interactive Approval | AwaitingApproval state, `POST /api/v1/jobs/{id}/approve\|reject` | ✅ | ✅ | Define approval policy, audit decisions |
|
||||
| | Certificate Discovery | Agents scan directories, triage (claim/dismiss) | ✅ | ✅ | Review discovered certs regularly |
|
||||
| **A1.1/A1.2** Availability and Recovery | Health Probes (Docker, Kubernetes) | `/health` and `/ready` endpoints | ✅ | ✅ | Use in container orchestration |
|
||||
| | Idempotent Migrations | `IF NOT EXISTS`, `ON CONFLICT ... DO NOTHING` | ✅ | ✅ | Test migration replay in staging |
|
||||
| | Agent Panic Recovery | Panic recovery in job loops | ✅ | ✅ | Monitor agent crashes in logs |
|
||||
| | Exponential Backoff | Agent heartbeat/work poll backoff (1s → 5m) | ✅ | ✅ | Monitor for control plane downtime |
|
||||
| | PostgreSQL Connection Pooling | MaxOpenConns=25, MaxIdleConns=5 (configurable) | ✅ | ✅ | Monitor connection usage |
|
||||
| **CC8.1** Change Control | Certificate Profiles | CRUD API + GUI, profile changes audited | ✅ | ✅ | Formal change control, test in staging |
|
||||
| | Policy Engine + Violations | CRUD API + GUI, policy changes audited | ✅ | ✅ | Document justification, implement approval workflow |
|
||||
| | Target Registration | CRUD API + GUI, changes audited | ✅ | ✅ | Confirm deletions, version control config |
|
||||
| | Immutable Audit Trail | Append-only `audit_events` table | ✅ | ✅ | Encrypt at rest, archive long-term, no manual edits |
|
||||
| | GitHub Actions CI | Unit tests, vet, coverage gates, build checks | ✅ | ✅ | Review PRs before merge, maintain test quality |
|
||||
|
||||
---
|
||||
|
||||
## What Requires Operator Action
|
||||
|
||||
**certctl is a tool, not a complete compliance solution.** Your organization must handle:
|
||||
|
||||
1. **Physical Security** — Protect the infrastructure (servers, network) running certctl. Certctl can't control who has physical access to your datacenter.
|
||||
|
||||
2. **Personnel Background Checks** — Before granting anyone API key access, conduct background checks per your policy. Certctl records *who* accessed *what*, but doesn't verify that people are trustworthy.
|
||||
|
||||
3. **Formal Incident Response Plan** — Certctl provides incident detection (anomalies in audit trail) and tools for response (revocation, rollback), but you must define *when* to use them and *who* decides.
|
||||
|
||||
4. **Access Review and Removal** — Certctl stores ownership, teams, and API keys. You must:
|
||||
- Regularly review who has access (quarterly or semi-annually)
|
||||
- Immediately revoke API keys for departing employees
|
||||
- Audit that removed access is actually removed (test that old keys fail)
|
||||
|
||||
5. **Log Retention and Archival** — Certctl logs to stdout (Docker) and stores audit events in PostgreSQL. You must:
|
||||
- Ship logs to a long-term archive (SIEM, S3, or equivalent)
|
||||
- Define retention policy (often 1-7 years per industry regulation)
|
||||
- Encrypt archived logs
|
||||
- Test that you can retrieve logs from archive (restoration drills)
|
||||
|
||||
6. **Encryption at Rest** — PostgreSQL data (including audit trail) is stored on disk. You must:
|
||||
- Enable transparent data encryption (TDE) on your database VM
|
||||
- Encrypt container persistent volumes (if using Kubernetes)
|
||||
- Encrypt database backups
|
||||
|
||||
7. **Network Segmentation** — Certctl API and database must be protected by network access controls. You must:
|
||||
- Firewall the control plane (only authorized services can connect)
|
||||
- Use VPN or private networks for agent-to-server communication
|
||||
- Isolate proxy agents (for F5, IIS, etc.) in the same network zone as their targets
|
||||
|
||||
8. **Capacity Planning** — Certctl's performance scales with your PostgreSQL. You must:
|
||||
- Estimate certificate inventory size (10k, 100k, 1M certs?)
|
||||
- Test Certctl with your expected scale in staging
|
||||
- Monitor disk usage, CPU, memory
|
||||
- Plan for growth (add PostgreSQL replicas, increase connection pool, etc.)
|
||||
|
||||
9. **Disaster Recovery** — Certctl data lives in PostgreSQL. You must:
|
||||
- Back up PostgreSQL regularly (daily or hourly, depending on RPO)
|
||||
- Test restore process in staging (broken backups discovered during incidents)
|
||||
- Have a runbook for failover to replica or recovery from backup
|
||||
- Document RTO/RPO targets (how long can cert management be down? how much data can you afford to lose?)
|
||||
|
||||
10. **Integration with Your IAM** — If using OIDC/SSO (V3), you must:
|
||||
- Configure your OIDC provider (Okta, Azure AD, Google)
|
||||
- Map user groups to Certctl roles (Admin, Operator, Viewer)
|
||||
- Manage MFA policy (enforce MFA if required)
|
||||
- Audit user provisioning/deprovisioning
|
||||
|
||||
11. **Documentation and Runbooks** — Certctl documents *what it does* (this guide), but you must document:
|
||||
- Your organization's certificate lifecycle policy (who requests, who approves, who deploys)
|
||||
- How to respond to specific incidents (cert compromise, CA compromise, agent down, renewal failed)
|
||||
- How to operate certctl (day-to-day tasks, escalation procedures)
|
||||
- Contact info for on-call teams
|
||||
|
||||
---
|
||||
|
||||
## V3 Enhancements
|
||||
|
||||
**certctl Pro (V3, paid edition) adds features that significantly strengthen SOC 2 evidence:**
|
||||
|
||||
- **OIDC / SSO Integration** — Integrate with Okta, Azure AD, Google to replace API keys with federated identity. Enables MFA enforcement and centralized access management. Auditors love federated identity (easier to remove access at source).
|
||||
|
||||
- **Role-Based Access Control (RBAC)** — Predefined roles (Admin: full access; Operator: issue/renew/revoke, no policy changes; Viewer: read-only) with profile-gated enforcement. Allows separation of duties (e.g., junior operator can't change global policy).
|
||||
|
||||
- **NATS Event Bus** — Real-time audit streaming to your SIEM. Hybrid model: HTTP for synchronous APIs, NATS for async events (cert.issued, cert.expiring, agent.heartbeat, job.completed). JetStream persistence for replay and durability.
|
||||
|
||||
- **SIEM Export** — Automated export of audit trail to Splunk, ELK, DataDog, etc. (webhooks, syslog, or pull-based APIs). Makes it easy for security teams to hunt for anomalies.
|
||||
|
||||
- **Advanced Search DSL** — `POST /api/v1/search` with tree-based filters (nested AND/OR, regex, field projection). Enables complex compliance queries (e.g., "all certs issued in the last 30 days by team X that are longer than 1 year").
|
||||
|
||||
- **Bulk Revocation** — Revoke all certs issued by a profile, owner, or agent in one operation. Critical for large-scale incidents (e.g., "a team's CA key was compromised, revoke all their certs").
|
||||
|
||||
- **Certificate Health Scores** — Composite risk scoring (e.g., "this cert has no short-lived TTL enforcement, extends past your policy max, and hasn't been renewed in 2 years" → health=30%). Helps prioritize remediation.
|
||||
|
||||
- **Compliance Scoring** — Audit readiness reporting per certificate (e.g., "compliance=95% — missing only a 3-year max-TTL constraint"). Exportable compliance report.
|
||||
|
||||
- **DigiCert Issuer Connector** — OV/EV certificate issuance for public-facing services (web servers, CDNs). Complements Local CA for internal use.
|
||||
|
||||
- **CT Log Monitoring** — Passive detection of unauthorized cert issuance. Monitors public CT logs for certs matching your domains and alerts if unexpected certs appear (e.g., attacker obtained a cert for your domain).
|
||||
|
||||
- **F5 BIG-IP Implementation** — Full target connector with iControl REST API. Agents can deploy certs to F5 load balancers.
|
||||
|
||||
- **IIS Implementation** — Dual-mode: agent-local PowerShell (default) for servers with agents, or proxy agent WinRM (agentless targets). Full Windows Server integration.
|
||||
|
||||
---
|
||||
|
||||
## Conclusion
|
||||
|
||||
certctl provides a strong foundation for SOC 2 compliance with API key authentication, immutable audit logging, automated alerting, and revocation capabilities. However, SOC 2 audits require evidence across your entire infrastructure — certctl is one piece. Use this guide to map certctl features to your audit questionnaire, then work with your auditors to identify gaps that must be filled by your own organizational policies and controls.
|
||||
|
||||
For a deeper SOC 2 discussion or a mock audit against this guide, contact your certctl Pro support team.
|
||||
@@ -0,0 +1,43 @@
|
||||
# Compliance Mapping Guides
|
||||
|
||||
certctl is a certificate lifecycle management tool, not a compliance product. It doesn't make you compliant — your organization, policies, and processes do that. What certctl provides is tooling that supports the technical controls auditors and evaluators look for when assessing certificate and key management practices.
|
||||
|
||||
These guides map certctl's features to three widely referenced compliance frameworks. They're designed for security engineers, IT auditors, and procurement teams evaluating certctl for environments with regulatory requirements.
|
||||
|
||||
## What's Covered
|
||||
|
||||
**[SOC 2 Type II](compliance-soc2.md)** — Maps certctl features to AICPA Trust Service Criteria. Covers logical access controls (CC6), system operations and monitoring (CC7), change management (CC8), and availability (A1). Most relevant for organizations undergoing SOC 2 audits where certificate management is in scope.
|
||||
|
||||
**[PCI-DSS 4.0](compliance-pci-dss.md)** — Maps certctl features to PCI Data Security Standard version 4.0 requirements. Covers data-in-transit protection (Req 4), cryptographic key management (Req 3), authentication (Req 8), audit logging (Req 10), secure development (Req 6), and access control (Req 7). Most relevant for organizations handling cardholder data where TLS certificates protect transmission channels.
|
||||
|
||||
**[NIST SP 800-57](compliance-nist.md)** — Maps certctl's key management practices to NIST Special Publication 800-57 Part 1 Rev 5 (2020). Covers key generation, storage, cryptoperiods, key state lifecycle, algorithm selection, key transport, and revocation. Most relevant for organizations aligning with US federal cryptographic guidance or using NIST as a key management baseline.
|
||||
|
||||
## What These Guides Are Not
|
||||
|
||||
These are mapping guides, not certification claims. certctl is not SOC 2 certified, PCI-DSS validated, or NIST-assessed. The guides document how certctl's technical implementation supports the controls these frameworks require — they do not replace your auditor's assessment, your organization's policies, or your security team's judgment.
|
||||
|
||||
The guides also clearly identify gaps where certctl's current implementation doesn't fully align with a framework's recommendations, features planned for future versions, and areas where operator action is required regardless of what certctl provides.
|
||||
|
||||
## How to Use These Guides
|
||||
|
||||
If you're evaluating certctl for a regulated environment, start with the framework your auditor cares about. Each guide includes an evidence summary table mapping specific compliance criteria to certctl features, API endpoints, and configuration — the kind of specifics your auditor will ask for.
|
||||
|
||||
If you're preparing for an audit and certctl is already deployed, use the "Operator Responsibilities" section of each guide to identify what your organization must manage beyond what certctl provides.
|
||||
|
||||
## Quick Reference
|
||||
|
||||
| Framework | Primary Concern | Key certctl Features |
|
||||
|---|---|---|
|
||||
| SOC 2 Type II | Trust service criteria for SaaS/infrastructure | API audit trail, auth controls, monitoring, change management |
|
||||
| PCI-DSS 4.0 | Cardholder data protection | TLS lifecycle, key management, immutable logging, access control |
|
||||
| NIST SP 800-57 | Cryptographic key management | Agent-side keygen, key isolation, algorithm selection, revocation |
|
||||
|
||||
## certctl Pro (V3) Enhancements
|
||||
|
||||
Several compliance-relevant features are planned for certctl Pro:
|
||||
|
||||
- **OIDC/SSO** — Enterprise identity provider integration (SOC 2 CC6.1, PCI-DSS 8.3)
|
||||
- **RBAC** — Role-based access control with admin/operator/viewer roles (SOC 2 CC6.3, PCI-DSS 7.2)
|
||||
- **NATS Audit Streaming** — Real-time audit event streaming to SIEM systems (SOC 2 CC7.2, PCI-DSS 10.2)
|
||||
- **Bulk Revocation** — Fleet-wide incident response capability (NIST SP 800-57 Section 5.4)
|
||||
- **Health/Compliance Scoring** — Automated compliance posture assessment per certificate
|
||||
@@ -1,6 +1,41 @@
|
||||
# Understanding Certificates: A Beginner's Guide
|
||||
|
||||
If you've never worked with TLS certificates before, this guide will get you up to speed. By the end, you'll understand what certificates are, why they matter, and why managing them at scale is hard enough to need a tool like certctl.
|
||||
If you've never worked with TLS certificates before, this guide will get you up to speed. By the end, you'll understand what certificates are, why they matter, and why the industry's move toward shorter certificate lifespans — down to 47 days by 2029 — makes automated lifecycle management essential.
|
||||
|
||||
## Contents
|
||||
|
||||
1. [What Is a TLS Certificate?](#what-is-a-tls-certificate)
|
||||
2. [Why Do Certificates Expire?](#why-do-certificates-expire)
|
||||
3. [The Cast of Characters](#the-cast-of-characters)
|
||||
- [Certificate Authority (CA)](#certificate-authority-ca)
|
||||
- [ACME Protocol](#acme-protocol)
|
||||
- [EST Protocol (Enrollment over Secure Transport)](#est-protocol-enrollment-over-secure-transport)
|
||||
- [Private Key](#private-key)
|
||||
- [Subject Alternative Names (SANs)](#subject-alternative-names-sans)
|
||||
- [Certificate Chain](#certificate-chain)
|
||||
4. [How certctl Works](#how-certctl-works)
|
||||
- [The Control Plane (Server)](#the-control-plane-server)
|
||||
- [Agents](#agents)
|
||||
- [Deployment Targets](#deployment-targets)
|
||||
5. [The Certificate Lifecycle](#the-certificate-lifecycle)
|
||||
6. [Why Not Just Use Certbot?](#why-not-just-use-certbot)
|
||||
7. [Key Concepts in certctl](#key-concepts-in-certctl)
|
||||
- [Teams and Owners](#teams-and-owners)
|
||||
- [Agent Groups](#agent-groups)
|
||||
- [Certificate Profiles](#certificate-profiles)
|
||||
- [Interactive Renewal Approval](#interactive-renewal-approval)
|
||||
- [Certificate Revocation](#certificate-revocation)
|
||||
- [Short-Lived Certificates](#short-lived-certificates)
|
||||
- [Policies](#policies)
|
||||
- [Jobs](#jobs)
|
||||
- [Audit Trail](#audit-trail)
|
||||
- [Notifications](#notifications)
|
||||
- [CLI](#cli)
|
||||
- [MCP Server (AI Integration)](#mcp-server-ai-integration)
|
||||
- [EST Enrollment (Device Certificates)](#est-enrollment-device-certificates)
|
||||
- [Certificate Discovery](#certificate-discovery)
|
||||
- [Observability](#observability)
|
||||
8. [What's Next](#whats-next)
|
||||
|
||||
## What Is a TLS Certificate?
|
||||
|
||||
@@ -12,11 +47,15 @@ Think of it like a notarized ID badge for a website. The badge says "I am api.ex
|
||||
|
||||
## Why Do Certificates Expire?
|
||||
|
||||
Every certificate has an expiration date, typically 90 days for Let's Encrypt or up to 1 year for commercial CAs. This isn't a bug — it's a security feature. Short lifetimes limit the damage if a private key is compromised, and they force organizations to prove they still control their domains.
|
||||
Every certificate has an expiration date. This isn't a bug — it's a security feature. Short lifetimes limit the damage if a private key is compromised, and they force organizations to prove they still control their domains.
|
||||
|
||||
The problem? When you have 5 certificates, tracking expiry dates is trivial. When you have 500 certificates spread across NGINX servers, F5 load balancers, and IIS boxes in three environments, it becomes a ticking time bomb. One missed renewal means a production outage — your site goes down, your API returns errors, and your customers see scary browser warnings.
|
||||
Certificate lifespans have been shrinking steadily. A decade ago, certificates lasted up to 5 years. Then the CA/Browser Forum — the industry body that sets certificate rules — reduced the maximum to 3 years, then 2 years, then 398 days. In April 2025, they passed Ballot SC-081v3 with zero opposition (25 CAs in favor, 5 abstentions, all 4 browser vendors in favor), setting a phased reduction to **200 days** (March 2026), **100 days** (March 2027), and **47 days** (March 2029). Let's Encrypt already issues 90-day certificates by default.
|
||||
|
||||
**This is the core problem certctl solves**: automated tracking, renewal, and deployment of certificates across your entire infrastructure.
|
||||
The trend is clear: shorter lifespans, more frequent renewals, and zero tolerance for manual processes.
|
||||
|
||||
When you have 5 certificates, tracking expiry dates is trivial. When you have 500 certificates spread across NGINX servers, Apache instances, HAProxy load balancers, F5 appliances, and IIS boxes in three environments — and each certificate needs renewal every 47 days — manual management becomes impossible. One missed renewal means a production outage: your site goes down, your API returns errors, and your customers see browser warnings.
|
||||
|
||||
**This is the core problem certctl solves**: end-to-end automation of the certificate lifecycle — issuance, renewal, and deployment — across your entire infrastructure, with no human intervention required.
|
||||
|
||||
## The Cast of Characters
|
||||
|
||||
@@ -26,11 +65,21 @@ A CA is the trusted third party that signs your certificates. When a CA signs a
|
||||
|
||||
Common CAs include Let's Encrypt (free, automated), DigiCert, Sectigo, and your organization's internal/private CA. Each issues certificates through different protocols and APIs.
|
||||
|
||||
certctl includes a built-in **Local CA** that can operate in two modes: self-signed (default, for development and demos) or as a **subordinate CA** under an enterprise root like Active Directory Certificate Services (ADCS). In sub-CA mode, you load a CA certificate and key signed by your enterprise root, and all certificates certctl issues automatically chain to the enterprise trust hierarchy — no manual trust configuration needed on clients that already trust your enterprise root. certctl also integrates with **step-ca** (Smallstep's private CA) via its native /sign API, providing a lightweight alternative to ACME for internal PKI.
|
||||
|
||||
### ACME Protocol
|
||||
|
||||
ACME (Automatic Certificate Management Environment) is the protocol Let's Encrypt created for automated certificate issuance. Instead of filling out forms and waiting for emails, ACME lets software request, validate, and receive certificates programmatically. The server proves domain ownership by responding to challenges — placing a specific file on the web server (HTTP-01) or creating a DNS record (DNS-01).
|
||||
ACME (Automatic Certificate Management Environment) is the protocol Let's Encrypt created for automated certificate issuance. Instead of filling out forms and waiting for emails, ACME lets software request, validate, and receive certificates programmatically. The server proves domain ownership by responding to challenges — placing a specific file on the web server (HTTP-01), creating a DNS record (DNS-01), or maintaining a standing DNS record that persists across renewals (DNS-PERSIST-01).
|
||||
|
||||
certctl speaks ACME natively via HTTP-01 challenges, so it can request certificates from Let's Encrypt or any ACME-compatible CA without manual intervention. DNS-01 challenge support (required for wildcard certificates) is planned for V2.
|
||||
certctl speaks ACME natively with HTTP-01, DNS-01, and DNS-PERSIST-01 challenges, so it can request certificates — including wildcard certificates — from Let's Encrypt or any ACME-compatible CA without manual intervention. HTTP-01 uses a built-in temporary HTTP server for domain validation; DNS-01 uses pluggable script-based hooks to create TXT records with any DNS provider (Cloudflare, Route53, Azure DNS, etc.); DNS-PERSIST-01 creates a standing `_validation-persist` TXT record once (containing the CA domain and account URI) that the CA revalidates on every renewal — no per-renewal DNS updates needed. If the CA doesn't yet support DNS-PERSIST-01, certctl automatically falls back to DNS-01.
|
||||
|
||||
### EST Protocol (Enrollment over Secure Transport)
|
||||
|
||||
EST (RFC 7030) is a standard protocol for devices to request certificates from a CA. While ACME was designed for web servers proving domain ownership, EST was designed for devices that need certificates without domain validation — think WiFi access points, corporate laptops connecting to 802.1X networks, IoT devices, and mobile devices managed by MDM platforms.
|
||||
|
||||
The workflow is straightforward: a device generates a key pair and a Certificate Signing Request (CSR), sends the CSR to the EST server, and gets back a signed certificate. The EST server also distributes its CA certificate chain so devices can build a complete trust path.
|
||||
|
||||
certctl includes a built-in EST server at `/.well-known/est/` with four operations: distributing the CA certificate chain (`/cacerts`), enrolling new devices (`/simpleenroll`), renewing existing certificates (`/simplereenroll`), and advertising CSR requirements (`/csrattrs`). EST enrollment uses the same issuer connectors as the REST API — so a certificate issued via EST and a certificate issued via the dashboard go through the same CA, appear in the same inventory, and follow the same policies.
|
||||
|
||||
### Private Key
|
||||
|
||||
@@ -58,7 +107,7 @@ The control plane never touches private keys. It coordinates the certificate lif
|
||||
|
||||
### Agents
|
||||
|
||||
Agents are lightweight processes that run on or near your infrastructure. They do the actual work: generating private keys, creating Certificate Signing Requests (CSRs), receiving signed certificates, and deploying them to servers. An agent might run on the same machine as your NGINX server, or on a management host that has SSH access to your web servers.
|
||||
Agents are lightweight processes that run on or near your infrastructure. They do the actual work: generating private keys, creating Certificate Signing Requests (CSRs), receiving signed certificates, and deploying them to target systems. An agent typically runs on the same machine as the target (e.g., your NGINX or IIS server), deploying certificates locally. For network appliances where you can't install an agent, a proxy agent in the same network zone handles deployment via the appliance's API.
|
||||
|
||||
The flow looks like this:
|
||||
|
||||
@@ -72,9 +121,13 @@ The flow looks like this:
|
||||
|
||||
At no point does the private key leave the agent. This is a fundamental security property.
|
||||
|
||||
Agents also report **metadata** about themselves — their operating system, CPU architecture, IP address, hostname, and version — with every heartbeat. This gives ops teams fleet-wide visibility (e.g., "how many agents are running on ARM?", "which agents are still on v1.0.0?") and powers **agent groups** — dynamic device grouping where policies can be scoped to specific agent criteria like OS type, architecture, or network subnet.
|
||||
|
||||
### Deployment Targets
|
||||
|
||||
Targets are the systems where certificates actually get installed — NGINX web servers, F5 BIG-IP load balancers, Microsoft IIS servers. Each target type has a **connector** that knows how to deploy certificates to that specific system (e.g., writing files and reloading NGINX config, calling the F5 REST API, running PowerShell commands on IIS via WinRM).
|
||||
Targets are the systems where certificates actually get installed — NGINX web servers, Apache httpd servers, HAProxy load balancers, F5 BIG-IP appliances, Microsoft IIS servers. Each target type has a **connector** that knows how to deploy certificates to that specific system (e.g., writing files and reloading NGINX or Apache config, building a combined PEM for HAProxy).
|
||||
|
||||
For targets where an agent runs directly on the machine (NGINX, Apache, HAProxy, IIS), the agent deploys certificates locally — no remote access needed. For network appliances where you can't install an agent (F5 BIG-IP, Palo Alto, etc.), a **proxy agent** in the same network zone picks up the deployment job and calls the appliance's API. The server never initiates outbound connections to any target.
|
||||
|
||||
## The Certificate Lifecycle
|
||||
|
||||
@@ -112,7 +165,58 @@ certctl is for organizations that need visibility, automation, and accountabilit
|
||||
|
||||
### Teams and Owners
|
||||
|
||||
Every certificate belongs to a **team** and has an **owner**. This answers the question "whose problem is it when this cert expires?" In a large organization, the platform team might own infrastructure certs while the payments team owns payment gateway certs.
|
||||
Every certificate belongs to a **team** and has an **owner**. This answers the question "whose problem is it when this cert expires?" In a large organization, the platform team might own infrastructure certs while the payments team owns payment gateway certs. Notifications are routed to the owner's email address automatically.
|
||||
|
||||
### Agent Groups
|
||||
|
||||
Agent groups let you organize agents by criteria — OS, architecture, IP subnet, or version — for dynamic policy scoping. For example, you can create a group matching all Linux agents and scope a renewal policy to that group. Groups can use dynamic matching criteria (agents automatically join when they match) or manual membership (explicitly include/exclude specific agents). Agent groups are managed via the GUI and API.
|
||||
|
||||
### Certificate Profiles
|
||||
|
||||
Certificate profiles define the cryptographic and lifecycle constraints for a class of certificates. A profile specifies which key types are allowed (e.g., RSA-2048, ECDSA P-256), the maximum validity period, and other enrollment rules. When a certificate is assigned to a profile, certctl enforces these constraints during issuance — if an agent submits a CSR with a disallowed key type, issuance is rejected.
|
||||
|
||||
Profiles answer the question "what kind of certificate is this?" while policies answer "is this certificate compliant?" A production TLS profile might allow only ECDSA P-256 with a 90-day max TTL, while a development profile might allow RSA-2048 with a 365-day TTL. Short-lived profiles (TTL under 1 hour) enable machine-to-machine authentication patterns where certificates are issued frequently and expire quickly — these are exempt from CRL/OCSP since expiry itself is sufficient revocation.
|
||||
|
||||
Profiles are managed via the API (`/api/v1/profiles`) and the GUI, and can be assigned to certificates during creation or updated later.
|
||||
|
||||
### Interactive Renewal Approval
|
||||
|
||||
For policies with `auto_renew` disabled, renewal jobs enter an **AwaitingApproval** state instead of processing immediately. An operator must explicitly approve or reject the renewal via the API or GUI. Approved jobs transition to Pending and are picked up by the scheduler. Rejected jobs are cancelled with an optional reason. This is useful for high-value certificates where you want human oversight before renewal.
|
||||
|
||||
### Renewal Timing: Thresholds vs. ARI (RFC 9702)
|
||||
|
||||
**Traditional approach (thresholds):** By default, certctl uses static renewal thresholds — renew a certificate at a fixed number of days before expiry (default: 30 days). This simple, predictable model works for most use cases: it avoids unnecessary renewals near expiry and gives you a predictable window to catch failures.
|
||||
|
||||
**Advanced approach (ACME ARI):** Some Certificate Authorities support ACME Renewal Information (RFC 9702), which allows the CA to tell certctl the optimal time to renew. Instead of guessing "renew 30 days before expiry," the CA responds with a precise `suggestedWindow` containing start and end times. This is useful when:
|
||||
- The CA is performing maintenance and wants to batch renewals in a specific window
|
||||
- The CA is coordinating a mass revocation (e.g., due to a compromise) and needs to control renewal timing
|
||||
- You want to avoid thundering herd renewal spikes by accepting the CA's suggested timing
|
||||
|
||||
**How it works:** Enable with `CERTCTL_ACME_ARI_ENABLED=true` on your ACME issuer. When a certificate approaches expiry, certctl queries the ARI endpoint with the certificate's DER encoding. The CA responds with a suggested renewal window. If the current time is within the window or past the start time, certctl renews immediately. Otherwise, it waits until the window opens.
|
||||
|
||||
**Graceful degradation:** If your CA doesn't support ARI (returns 404 from the ARI endpoint), certctl automatically falls back to the traditional threshold-based renewal. No configuration change needed — the fallback is transparent. Errors from the CA are logged as warnings and don't block the renewal process.
|
||||
|
||||
### Certificate Revocation
|
||||
|
||||
When a private key is compromised, a certificate is superseded, or a service is decommissioned, you need to revoke the certificate immediately — not wait for it to expire. Revocation tells clients "stop trusting this certificate right now."
|
||||
|
||||
certctl implements revocation using three complementary mechanisms:
|
||||
|
||||
**Revocation API**: `POST /api/v1/certificates/{id}/revoke` marks a certificate as revoked in the inventory, records the revocation in a dedicated `certificate_revocations` table, notifies the issuing CA (best-effort — the revocation succeeds even if the CA is unreachable), creates an audit trail entry, and sends notifications. You can specify an RFC 5280 reason code (keyCompromise, superseded, cessationOfOperation, etc.) or let it default to "unspecified."
|
||||
|
||||
**Certificate Revocation List (CRL)**: certctl serves both a JSON-formatted CRL at `GET /api/v1/crl` and DER-encoded X.509 CRLs per issuer at `GET /api/v1/crl/{issuer_id}`. The DER CRL is signed by the issuing CA's key and has 24-hour validity — clients can download it periodically to check revocation status offline.
|
||||
|
||||
**OCSP Responder**: For real-time revocation checking, certctl includes an embedded OCSP responder at `GET /api/v1/ocsp/{issuer_id}/{serial}`. It returns signed OCSP responses (good, revoked, or unknown) so clients can verify certificate status without downloading the full CRL.
|
||||
|
||||
Short-lived certificates (those assigned to profiles with TTL under 1 hour) are exempt from CRL and OCSP — their rapid expiry is considered sufficient revocation. This is a deliberate design choice to reduce infrastructure overhead for ephemeral machine-to-machine credentials.
|
||||
|
||||
### Short-Lived Certificates
|
||||
|
||||
Short-lived certificates are certificates with a TTL under 1 hour, typically used for service-to-service authentication in microservice architectures. Instead of revoking these certificates when something goes wrong, you simply stop issuing new ones — the existing certificates expire within minutes.
|
||||
|
||||
certctl provides a dedicated dashboard view for short-lived credentials that shows active certificates with live TTL countdowns, auto-refreshes every 10 seconds, and filters by profile. This gives ops teams real-time visibility into ephemeral credential activity without cluttering the main certificate inventory.
|
||||
|
||||
Short-lived certificates are defined by their profile — assign a certificate to a profile with `max_validity_days` that translates to under 1 hour, and certctl automatically treats it as short-lived: no CRL/OCSP entries, no revocation overhead, just rapid issuance and natural expiry.
|
||||
|
||||
### Policies
|
||||
|
||||
@@ -120,7 +224,7 @@ Policies are guardrails. You can enforce rules like "production certificates mus
|
||||
|
||||
### Jobs
|
||||
|
||||
Every action in certctl — issuing a certificate, renewing one, deploying to a target — is tracked as a **job**. Jobs have states (Pending, Running, Completed, Failed, Cancelled), retry logic, and a full audit trail. If a deployment fails, you can see exactly what happened and when.
|
||||
Every action in certctl — issuing a certificate, renewing one, deploying to a target — is tracked as a **job**. Jobs have states (Pending, AwaitingCSR, AwaitingApproval, Running, Completed, Failed, Cancelled), retry logic, and a full audit trail. AwaitingCSR means the job is waiting for an agent to generate a key and submit a CSR. AwaitingApproval means the job requires human approval before proceeding (used with non-auto-renew policies). If a deployment fails, you can see exactly what happened and when.
|
||||
|
||||
### Audit Trail
|
||||
|
||||
@@ -128,10 +232,49 @@ Every action is logged: who did it, what changed, when, and why. This is essenti
|
||||
|
||||
### Notifications
|
||||
|
||||
certctl can alert you when certificates are expiring, when renewals fail, when deployments succeed, or when policy violations are detected. Notifications go out via email or webhooks, with Slack support planned.
|
||||
certctl can alert you when certificates are expiring, when renewals fail, when deployments succeed, or when policy violations are detected. Notifications are delivered via six channels: Email, Webhook, Slack, Microsoft Teams, PagerDuty, and OpsGenie. Each notifier is configured independently via environment variables and can be enabled or disabled as needed.
|
||||
|
||||
### CLI
|
||||
|
||||
certctl ships with a command-line tool (`certctl-cli`) for operators who prefer terminal workflows or need to integrate certctl into shell scripts and CI/CD pipelines. The CLI wraps the REST API with 12 subcommands organized by resource: `certs list`, `certs get`, `certs renew`, `certs revoke`, `agents list`, `agents get`, `jobs list`, `jobs get`, `jobs cancel`, `import` (bulk PEM import), `status` (health + summary stats), and `version`.
|
||||
|
||||
The CLI supports both table and JSON output formats (`--format table` or `--format json`), connects to the server via `CERTCTL_SERVER_URL` and authenticates with `CERTCTL_API_KEY`. It's built with Go's standard library only — no external dependencies.
|
||||
|
||||
### MCP Server (AI Integration)
|
||||
|
||||
certctl includes an MCP (Model Context Protocol) server that exposes 78 MCP tools covering the REST API. This enables AI assistants like Claude, Cursor, and other MCP-compatible tools to interact with your certificate infrastructure using natural language — "show me all expiring certificates," "revoke the VPN cert," or "what agents are offline?"
|
||||
|
||||
The MCP server is a separate binary (`cmd/mcp-server/`) that communicates via stdio transport and acts as a stateless HTTP proxy to the certctl REST API. It requires no additional infrastructure — just point it at your certctl server URL and API key.
|
||||
|
||||
### EST Enrollment (Device Certificates)
|
||||
|
||||
certctl's EST server enables device certificate enrollment for use cases that don't fit the traditional "ops team requests a cert via API" model. When a RADIUS server is configured to use certctl for 802.1X WiFi authentication, or an MDM platform enrolls corporate devices, they use the EST protocol at `/.well-known/est/`. The EST server validates the CSR, issues a certificate via the configured issuer connector, and returns it in PKCS#7 format — the standard wire format that every EST client understands. Each enrollment is recorded in the audit trail with the protocol, common name, SANs, issuer, and serial number.
|
||||
|
||||
Enable it with `CERTCTL_EST_ENABLED=true`. Optionally bind enrollments to a specific issuer (`CERTCTL_EST_ISSUER_ID`) or certificate profile (`CERTCTL_EST_PROFILE_ID`) to constrain what EST clients can request.
|
||||
|
||||
### Certificate Discovery
|
||||
|
||||
Certificate discovery is the process of automatically finding existing certificates in your infrastructure — certificates you didn't issue through certctl, possibly issued by other CAs or tools. This is essential for building a complete inventory before you can manage everything.
|
||||
|
||||
**How it works:** There are two discovery modes. *Filesystem discovery* — agents scan configured directories (configured via `CERTCTL_DISCOVERY_DIRS`) for certificate files. On startup and every 6 hours, the agent walks directories recursively, parses PEM and DER files, extracts metadata, and reports findings to the control plane. *Network discovery* — the control plane itself probes TLS endpoints across configured CIDR ranges and ports (enabled via `CERTCTL_NETWORK_SCAN_ENABLED=true`). It connects to each endpoint, extracts certificates from the TLS handshake, and feeds results into the same discovery pipeline. This finds certificates on services you may not have agents on. In both cases, the server deduplicates by fingerprint and stores discovered certs with a status: **Unmanaged** (discovered but not yet managed), **Managed** (linked to a control plane cert), or **Dismissed** (operator decided not to manage it).
|
||||
|
||||
This gives you a three-step triage workflow:
|
||||
1. **Discover** — Agents scan filesystems and the server probes network endpoints to find all existing certs
|
||||
2. **Triage** — Operators review discoveries in the **Discovery** dashboard page and decide: claim it (link to a managed certificate) or dismiss it (not worth managing). The dashboard shows a summary stats bar (Unmanaged/Managed/Dismissed counts), filters by status and agent, and provides one-click claim and dismiss actions.
|
||||
3. **Baseline** — Once triaged, you have a complete baseline of what's deployed, what you're managing, and what's unmanaged
|
||||
|
||||
Network scan targets are managed from the **Network Scans** dashboard page — create CIDR ranges and ports to probe, enable/disable targets, trigger on-demand scans, and view results. Discovered certificates from network scans appear in the same Discovery triage page alongside filesystem discoveries.
|
||||
|
||||
This is a prerequisite for multi-CA migration, compliance audits, and building confidence that you've found all the certificates that matter.
|
||||
|
||||
### Observability
|
||||
|
||||
certctl exposes metrics in two formats: a JSON endpoint at `GET /api/v1/metrics` and a Prometheus exposition format at `GET /api/v1/metrics/prometheus` (compatible with Prometheus, Grafana Agent, Datadog Agent, and Victoria Metrics). Both provide gauges (certificate totals by status, agent counts, pending jobs), counters (completed/failed jobs), and uptime. Five stats endpoints power the dashboard charts: summary statistics, certificates by status, expiration timeline, job trends, and issuance rate.
|
||||
|
||||
The agent fleet overview page groups agents by OS, architecture, and version, showing distribution charts that help ops teams track fleet health and identify outdated agents. All API requests are logged via structured `slog` middleware with request IDs for correlation.
|
||||
|
||||
## What's Next
|
||||
|
||||
Now that you understand the concepts, head to the [Quick Start Guide](quickstart.md) to get certctl running locally in under 5 minutes. You'll see a pre-loaded dashboard with demo certificates, explore the API, and understand how everything fits together.
|
||||
|
||||
For a deeper look at the system design, see the [Architecture Guide](architecture.md).
|
||||
For a deeper look at the system design, see the [Architecture Guide](architecture.md). For terminal-based workflows, check out the CLI Guide (docs coming soon). For AI-native integration, see the [MCP Server Guide](mcp.md). For the full API reference, see the [OpenAPI Spec Guide](openapi.md).
|
||||
|
||||
@@ -2,15 +2,60 @@
|
||||
|
||||
Connectors extend certctl to integrate with external systems for certificate issuance, deployment, and notifications. This guide covers the connector interfaces, built-in implementations, and how to build your own.
|
||||
|
||||
## Contents
|
||||
|
||||
1. [Overview](#overview)
|
||||
2. [Issuer Connector](#issuer-connector)
|
||||
- [Interface](#interface)
|
||||
- [Built-in: Local CA](#built-in-local-ca)
|
||||
- [Built-in: ACME v2 (Let's Encrypt, Sectigo, ZeroSSL)](#built-in-acme-v2-lets-encrypt-sectigo-zerossl)
|
||||
- [Built-in: step-ca (Smallstep Private CA)](#built-in-step-ca-smallstep-private-ca)
|
||||
- [OpenSSL / Custom CA](#openssl--custom-ca)
|
||||
- [Revocation Across Issuers](#revocation-across-issuers)
|
||||
- [EST Integration (GetCACertPEM)](#est-integration-getcacertpem)
|
||||
- [Planned Issuers](#planned-issuers)
|
||||
- [Building a Custom Issuer](#building-a-custom-issuer)
|
||||
3. [Target Connector](#target-connector)
|
||||
- [Interface](#interface-1)
|
||||
- [Built-in: NGINX](#built-in-nginx)
|
||||
- [Built-in: Apache httpd](#built-in-apache-httpd)
|
||||
- [Built-in: HAProxy](#built-in-haproxy)
|
||||
- [Built-in: Traefik](#built-in-traefik)
|
||||
- [Built-in: Caddy](#built-in-caddy)
|
||||
- [F5 BIG-IP (Interface Only)](#f5-big-ip-interface-only)
|
||||
- [IIS (Interface Only, Dual-Mode)](#iis-interface-only-dual-mode)
|
||||
4. [Notifier Connector](#notifier-connector)
|
||||
- [Interface](#interface-2)
|
||||
5. [Registering a Connector](#registering-a-connector)
|
||||
- [IssuerConnectorAdapter](#issuerconnectoradapter)
|
||||
- [Notifier Registration](#notifier-registration)
|
||||
6. [Testing Connectors](#testing-connectors)
|
||||
- [Unit Tests](#unit-tests)
|
||||
- [Integration Tests](#integration-tests)
|
||||
7. [Best Practices](#best-practices)
|
||||
8. [Agent Discovery Scanner](#agent-discovery-scanner)
|
||||
- [Configuration](#configuration)
|
||||
- [How It Works](#how-it-works)
|
||||
- [API Endpoints](#api-endpoints)
|
||||
- [Use Cases](#use-cases)
|
||||
9. [Network Certificate Scanner (M21)](#network-certificate-scanner-m21)
|
||||
- [Configuration](#configuration-1)
|
||||
- [Creating Scan Targets](#creating-scan-targets)
|
||||
- [How It Works](#how-it-works-1)
|
||||
- [API Endpoints](#api-endpoints-1)
|
||||
- [Scheduler Integration](#scheduler-integration)
|
||||
- [Use Cases](#use-cases-1)
|
||||
10. [What's Next](#whats-next)
|
||||
|
||||
## Overview
|
||||
|
||||
Three types of connectors:
|
||||
|
||||
1. **Issuer Connector** — Obtains certificates from CAs (Local CA, ACME implemented; step-ca, ADCS, OpenSSL planned V2; DigiCert, Entrust, GlobalSign, EJBCA, Vault PKI, Google CAS planned V3)
|
||||
2. **Target Connector** — Deploys certificates to infrastructure (NGINX implemented; F5, IIS interface only; Apache httpd, HAProxy planned V2; AWS ALB, Azure Key Vault, Palo Alto, FortiGate, Citrix ADC, Kubernetes Secrets planned V3)
|
||||
3. **Notifier Connector** — Sends alerts about certificate events (Email, Webhooks; Slack, Teams, PagerDuty, OpsGenie planned V2.1)
|
||||
1. **Issuer Connector** — Obtains certificates from CAs (Local CA with sub-CA support, ACME with HTTP-01 + DNS-01 + DNS-PERSIST-01, step-ca, OpenSSL/Custom CA implemented; additional CA integrations planned)
|
||||
2. **Target Connector** — Deploys certificates to infrastructure (NGINX, Apache httpd, HAProxy, Traefik, Caddy implemented; F5 via proxy agent, IIS dual-mode interface only; additional cloud and network targets planned)
|
||||
3. **Notifier Connector** — Sends alerts about certificate events (Email, Webhooks, Slack, Microsoft Teams, PagerDuty, OpsGenie implemented)
|
||||
|
||||
All connectors accept JSON configuration at initialization, support config validation, and are registered in the service layer. Issuer connectors run on the control plane; target connectors run on agents.
|
||||
All connectors accept JSON configuration at initialization, support config validation, and are registered in the service layer. Issuer connectors run on the control plane; target connectors run on agents. For network appliances where agents can't be installed, a **proxy agent** in the same network zone handles deployment — the server never initiates outbound connections.
|
||||
|
||||
## Issuer Connector
|
||||
|
||||
@@ -37,6 +82,19 @@ type Connector interface {
|
||||
|
||||
// GetOrderStatus checks the status of an async issuance order
|
||||
GetOrderStatus(ctx context.Context, orderID string) (*OrderStatus, error)
|
||||
|
||||
// GenerateCRL generates a DER-encoded X.509 CRL signed by this issuer.
|
||||
// Returns nil if the issuer does not support CRL generation (e.g., ACME).
|
||||
GenerateCRL(ctx context.Context, revokedCerts []RevokedCertEntry) ([]byte, error)
|
||||
|
||||
// SignOCSPResponse signs an OCSP response for the given certificate serial.
|
||||
// Returns nil if the issuer does not support OCSP (e.g., ACME).
|
||||
SignOCSPResponse(ctx context.Context, req OCSPSignRequest) ([]byte, error)
|
||||
|
||||
// GetCACertPEM returns the PEM-encoded CA certificate chain for this issuer.
|
||||
// Used by the EST server's /cacerts endpoint (RFC 7030).
|
||||
// Returns error if the issuer doesn't provide a static CA chain (e.g., ACME, step-ca).
|
||||
GetCACertPEM(ctx context.Context) (string, error)
|
||||
}
|
||||
|
||||
type IssuanceRequest struct {
|
||||
@@ -58,38 +116,46 @@ type RenewalRequest struct {
|
||||
CommonName string
|
||||
SANs []string
|
||||
CSRPEM string
|
||||
OrderID string // optional, for tracking
|
||||
OrderID *string // optional, for tracking (pointer — nil when not provided)
|
||||
}
|
||||
|
||||
type RevocationRequest struct {
|
||||
Serial string
|
||||
Reason string // optional
|
||||
Reason *string // optional (pointer — nil when not provided)
|
||||
}
|
||||
|
||||
type OrderStatus struct {
|
||||
OrderID string
|
||||
Status string // "pending", "valid", "invalid", "expired"
|
||||
Message string
|
||||
CertPEM string
|
||||
ChainPEM string
|
||||
Serial string
|
||||
NotBefore time.Time
|
||||
NotAfter time.Time
|
||||
Status string // "pending", "valid", "invalid", "expired"
|
||||
Message *string // optional (pointer fields are omitted from JSON when nil)
|
||||
CertPEM *string // populated when order is complete
|
||||
ChainPEM *string // populated when order is complete
|
||||
Serial *string // populated when order is complete
|
||||
NotBefore *time.Time // populated when order is complete
|
||||
NotAfter *time.Time // populated when order is complete
|
||||
UpdatedAt time.Time
|
||||
}
|
||||
```
|
||||
|
||||
### Built-in: Local CA
|
||||
|
||||
The Local CA issuer generates self-signed certificates using Go's `crypto/x509` library. It creates a CA on first use (in memory), issues certificates with proper serial numbers, validity periods, SANs, and key usage extensions.
|
||||
The Local CA issuer signs certificates using Go's `crypto/x509` library. It supports two modes:
|
||||
|
||||
This issuer is designed for development and demos only — certificates are self-signed and not trusted by browsers.
|
||||
**Self-signed mode (default):** Creates a CA on first use (in memory), issues certificates with proper serial numbers, validity periods, SANs, and key usage extensions. Designed for development and demos — certificates are self-signed and not trusted by browsers.
|
||||
|
||||
**Sub-CA mode:** Loads a CA certificate and private key from disk (`CERTCTL_CA_CERT_PATH` + `CERTCTL_CA_KEY_PATH`). The CA cert is signed by an upstream CA (e.g., ADCS), so all issued certificates chain to the enterprise root trust hierarchy. Clients that already trust the enterprise root automatically trust certctl-issued certs. Supports RSA, ECDSA, and PKCS#8 key formats. If the paths are not set, falls back to self-signed mode. The loaded certificate must have `IsCA=true` and `KeyUsageCertSign`.
|
||||
|
||||
**CRL and OCSP support (M15b):** The Local CA supports DER-encoded X.509 CRL generation via `GET /api/v1/crl/{issuer_id}` with 24-hour validity. An embedded OCSP responder at `GET /api/v1/ocsp/{issuer_id}/{serial}` returns signed OCSP responses for issued certificates (good/revoked/unknown status). Certificates with profile TTL < 1 hour automatically skip CRL/OCSP — expiry is treated as sufficient revocation for short-lived credentials.
|
||||
|
||||
**Extended Key Usage (EKU) support (M27):** The Local CA respects EKU constraints from certificate profiles and adjusts key usage flags accordingly. For S/MIME certificates (emailProtection EKU), it uses `DigitalSignature | ContentCommitment` instead of the TLS default. For TLS certificates (serverAuth/clientAuth EKU), it uses `DigitalSignature | KeyEncipherment`. This enables support for multiple certificate types — TLS, S/MIME, code signing, timestamping — from a single CA.
|
||||
|
||||
Configuration:
|
||||
```json
|
||||
{
|
||||
"ca_common_name": "CertCtl Local CA",
|
||||
"validity_days": 90
|
||||
"validity_days": 90,
|
||||
"ca_cert_path": "/etc/certctl/ca/ca.pem",
|
||||
"ca_key_path": "/etc/certctl/ca/ca-key.pem"
|
||||
}
|
||||
```
|
||||
|
||||
@@ -97,9 +163,17 @@ Location: `internal/connector/issuer/local/local.go`
|
||||
|
||||
### Built-in: ACME v2 (Let's Encrypt, Sectigo, ZeroSSL)
|
||||
|
||||
The ACME connector implements the full ACME v2 protocol using Go's `golang.org/x/crypto/acme` package. It supports HTTP-01 challenge solving via a built-in temporary HTTP server that starts on demand during certificate issuance.
|
||||
The ACME connector implements the full ACME v2 protocol using Go's `golang.org/x/crypto/acme` package. It supports three challenge methods:
|
||||
|
||||
Configuration:
|
||||
**HTTP-01 (default):** A built-in temporary HTTP server starts on demand during certificate issuance. The domain being validated must resolve to the machine running the connector, and the configured HTTP port must be reachable from the internet.
|
||||
|
||||
**DNS-01 (for wildcards):** Creates DNS TXT records via user-provided scripts. Required for wildcard certificates (`*.example.com`) and hosts that can't serve HTTP on port 80. The connector invokes external scripts to create and clean up `_acme-challenge` TXT records, making it compatible with any DNS provider (Cloudflare, Route53, Azure DNS, etc.).
|
||||
|
||||
**DNS-PERSIST-01 (standing record):** Creates a one-time persistent TXT record at `_validation-persist.<domain>` containing the CA's issuer domain and your ACME account URI. Once set, this record authorizes unlimited future certificate issuances without per-renewal DNS updates. Based on [draft-ietf-acme-dns-persist](https://datatracker.ietf.org/doc/draft-ietf-acme-dns-persist/) and CA/Browser Forum ballot SC-088v3. If the CA doesn't offer dns-persist-01 yet, the connector falls back to dns-01 automatically.
|
||||
|
||||
**ACME Renewal Information (ARI, RFC 9702):** Instead of using fixed renewal thresholds (e.g., renew 30 days before expiry), certctl can ask the CA when it should renew. Enable with `CERTCTL_ACME_ARI_ENABLED=true`. The ARI protocol lets the CA specify a `suggestedWindow` (start and end times) for when you should renew — useful for distributing load during maintenance windows or coordinating mass revocation scenarios. Cert ID is computed as `base64url(SHA-256(DER cert))`. If the CA doesn't support ARI (404 response), certctl automatically falls back to threshold-based renewal with no operator intervention required.
|
||||
|
||||
HTTP-01 configuration:
|
||||
```json
|
||||
{
|
||||
"directory_url": "https://acme-staging-v02.api.letsencrypt.org/directory",
|
||||
@@ -108,27 +182,144 @@ Configuration:
|
||||
}
|
||||
```
|
||||
|
||||
For HTTP-01 to work, the domain being validated must resolve to the machine running the connector, and the configured HTTP port must be reachable from the internet. The connector automatically registers an ACME account, creates orders, solves challenges, finalizes with the CSR, and downloads the issued certificate chain.
|
||||
DNS-01 configuration:
|
||||
```json
|
||||
{
|
||||
"directory_url": "https://acme-v02.api.letsencrypt.org/directory",
|
||||
"email": "admin@example.com",
|
||||
"challenge_type": "dns-01",
|
||||
"dns_present_script": "/etc/certctl/dns/create-record.sh",
|
||||
"dns_cleanup_script": "/etc/certctl/dns/delete-record.sh",
|
||||
"dns_propagation_wait": 30
|
||||
}
|
||||
```
|
||||
|
||||
**Limitation:** v1 supports HTTP-01 challenges only. DNS-01 challenge support (required for wildcard certificates and hosts that can't serve HTTP on port 80) is planned for V2, including provider-specific DNS adapters (Cloudflare, Route53, etc.) and custom validation script hooks.
|
||||
DNS-PERSIST-01 configuration:
|
||||
```json
|
||||
{
|
||||
"directory_url": "https://acme-v02.api.letsencrypt.org/directory",
|
||||
"email": "admin@example.com",
|
||||
"challenge_type": "dns-persist-01",
|
||||
"dns_present_script": "/etc/certctl/dns/create-record.sh",
|
||||
"dns_persist_issuer_domain": "letsencrypt.org",
|
||||
"dns_propagation_wait": 30
|
||||
}
|
||||
```
|
||||
|
||||
The present script creates a TXT record at `_validation-persist.<domain>` with the value `letsencrypt.org; accounturi=https://acme-v02.api.letsencrypt.org/acme/acct/<your-id>`. This record is permanent — no cleanup script is needed.
|
||||
|
||||
ZeroSSL configuration (requires External Account Binding):
|
||||
```json
|
||||
{
|
||||
"directory_url": "https://acme.zerossl.com/v2/DV90",
|
||||
"email": "admin@example.com",
|
||||
"eab_kid": "your-zerossl-eab-kid",
|
||||
"eab_hmac": "your-zerossl-eab-hmac-base64url"
|
||||
}
|
||||
```
|
||||
|
||||
ZeroSSL, Google Trust Services, and SSL.com require External Account Binding (EAB) for ACME account registration. For most CAs, get your EAB credentials from the CA's dashboard and provide them via `eab_kid` and `eab_hmac`. The HMAC key must be base64url-encoded (no padding). CAs that don't require EAB (Let's Encrypt, Buypass) ignore these fields.
|
||||
|
||||
**ZeroSSL auto-EAB:** When the directory URL points to ZeroSSL and no EAB credentials are provided, certctl automatically fetches them from ZeroSSL's public API (`api.zerossl.com/acme/eab-credentials-email`) using your configured email address. No dashboard visit required — just set the directory URL and email, and it works. This is the same approach used by Caddy and acme.sh.
|
||||
|
||||
Minimal ZeroSSL configuration (auto-EAB):
|
||||
```json
|
||||
{
|
||||
"directory_url": "https://acme.zerossl.com/v2/DV90",
|
||||
"email": "admin@example.com"
|
||||
}
|
||||
```
|
||||
|
||||
DNS hook scripts receive these environment variables: `CERTCTL_DNS_DOMAIN` (domain being validated), `CERTCTL_DNS_FQDN` (full record name — `_acme-challenge.<domain>` for dns-01, `_validation-persist.<domain>` for dns-persist-01), `CERTCTL_DNS_VALUE` (TXT record value), `CERTCTL_DNS_TOKEN` (ACME challenge token). The present script must create the TXT record and exit 0; the cleanup script removes it (dns-01 only).
|
||||
|
||||
Environment variables for the default ACME connector:
|
||||
- `CERTCTL_ACME_DIRECTORY_URL` — ACME directory URL
|
||||
- `CERTCTL_ACME_EMAIL` — Contact email for account registration
|
||||
- `CERTCTL_ACME_EAB_KID` — External Account Binding Key ID (required by ZeroSSL, Google Trust Services, SSL.com)
|
||||
- `CERTCTL_ACME_EAB_HMAC` — External Account Binding HMAC key (base64url-encoded)
|
||||
- `CERTCTL_ACME_CHALLENGE_TYPE` — `http-01` (default), `dns-01`, or `dns-persist-01`
|
||||
- `CERTCTL_ACME_DNS_PRESENT_SCRIPT` — Path to DNS record creation script (dns-01 and dns-persist-01)
|
||||
- `CERTCTL_ACME_DNS_CLEANUP_SCRIPT` — Path to DNS record cleanup script (dns-01 only, not used by dns-persist-01)
|
||||
- `CERTCTL_ACME_DNS_PERSIST_ISSUER_DOMAIN` — CA issuer domain for persistent record (dns-persist-01 only, e.g., `letsencrypt.org`)
|
||||
|
||||
The connector is registered in the issuer registry under `iss-acme-staging` and `iss-acme-prod`. Use `iss-acme-staging` for Let's Encrypt staging (rate-limit-friendly testing) and `iss-acme-prod` for production certificates.
|
||||
|
||||
Location: `internal/connector/issuer/acme/acme.go`
|
||||
**Note:** ACME-issued certificates rely on the Local CA for CRL/OCSP endpoints if they are stored in certctl's inventory. For issuers with their own public CRL/OCSP infrastructure (e.g., Let's Encrypt), clients should validate against the issuer's endpoints instead.
|
||||
|
||||
### Planned Issuers (V2)
|
||||
Location: `internal/connector/issuer/acme/acme.go`, `internal/connector/issuer/acme/dns.go`
|
||||
|
||||
The following issuer connectors are planned for V2:
|
||||
### Built-in: step-ca (Smallstep Private CA)
|
||||
|
||||
- **step-ca** — Smallstep's private CA and ACME server. Would allow certctl to issue certificates from a self-hosted step-ca instance via its ACME or provisioner APIs.
|
||||
- **OpenSSL / Custom CA** — Support for external CAs that use OpenSSL-based signing workflows, including custom script hooks for organizations with existing CA tooling.
|
||||
- **ADCS (Active Directory Certificate Services)** — Microsoft's enterprise CA. Would allow certctl to request certificates from an existing ADCS infrastructure, useful for organizations that need lifecycle management around their Windows PKI.
|
||||
- **Vault PKI** — HashiCorp Vault's PKI secrets engine for organizations using Vault as their internal CA.
|
||||
- **DigiCert** — Commercial CA integration via DigiCert's REST API.
|
||||
The step-ca connector integrates with Smallstep's step-ca private certificate authority using its native `/sign` API with JWK provisioner authentication. This is simpler than ACME for internal PKI — no challenge solving, no domain validation, just CSR + auth token → signed certificate.
|
||||
|
||||
Configuration:
|
||||
```json
|
||||
{
|
||||
"ca_url": "https://ca.internal:9000",
|
||||
"provisioner_name": "certctl",
|
||||
"provisioner_key_path": "/etc/certctl/stepca/provisioner.json",
|
||||
"provisioner_password": "...",
|
||||
"root_cert_path": "/etc/certctl/stepca/root_ca.crt",
|
||||
"validity_days": 90
|
||||
}
|
||||
```
|
||||
|
||||
Environment variables:
|
||||
- `CERTCTL_STEPCA_URL` — step-ca server URL
|
||||
- `CERTCTL_STEPCA_PROVISIONER` — JWK provisioner name
|
||||
- `CERTCTL_STEPCA_KEY_PATH` — Path to provisioner private key (JWK JSON)
|
||||
- `CERTCTL_STEPCA_PASSWORD` — Provisioner key password
|
||||
|
||||
The connector is registered in the issuer registry under `iss-stepca`. step-ca also works with the existing ACME connector (point `iss-acme-*` at step-ca's ACME directory URL for ACME-based issuance).
|
||||
|
||||
**Note:** step-ca-issued certificates rely on step-ca's own CRL/OCSP infrastructure. certctl's local CRL/OCSP endpoints (`GET /api/v1/crl/{issuer_id}` and `GET /api/v1/ocsp/{issuer_id}/{serial}`) are populated from step-ca's revocation data if available, but clients should validate against step-ca's endpoints for the authoritative status.
|
||||
|
||||
Location: `internal/connector/issuer/stepca/stepca.go`
|
||||
|
||||
### OpenSSL / Custom CA
|
||||
|
||||
Script-based issuer connector for organizations with existing CA tooling. Delegates certificate signing, revocation, and CRL generation to user-provided shell scripts.
|
||||
|
||||
**Configuration:**
|
||||
| Variable | Required | Description |
|
||||
|----------|----------|-------------|
|
||||
| `CERTCTL_OPENSSL_SIGN_SCRIPT` | Yes | Script that receives CSR on stdin and outputs signed PEM cert on stdout |
|
||||
| `CERTCTL_OPENSSL_REVOKE_SCRIPT` | No | Script to revoke a certificate (receives serial number as argument) |
|
||||
| `CERTCTL_OPENSSL_CRL_SCRIPT` | No | Script that outputs DER-encoded CRL on stdout |
|
||||
| `CERTCTL_OPENSSL_TIMEOUT_SECONDS` | No | Script execution timeout (default: 30s) |
|
||||
|
||||
The sign script receives the CSR PEM on stdin and should output the signed certificate PEM on stdout. The connector parses the certificate to extract serial number, validity dates, and chain information. Before shell execution, serial numbers are validated as hex-only (`^[0-9a-fA-F]+$`) and revocation reason codes are validated against the RFC 5280 specification to prevent command injection.
|
||||
|
||||
### Revocation Across Issuers
|
||||
|
||||
All issuer connectors implement `RevokeCertificate(ctx, serial, reason)`. When a certificate is revoked via `POST /api/v1/certificates/{id}/revoke`, certctl notifies the issuing CA on a best-effort basis — the revocation succeeds in certctl's inventory even if the CA notification fails (e.g., CA is temporarily unreachable). This ensures revocation is never blocked by external dependencies.
|
||||
|
||||
Each issuer handles revocation differently:
|
||||
|
||||
- **Local CA**: Updates the in-memory revocation list. DER-encoded CRLs and OCSP responses are generated from this list.
|
||||
- **ACME**: ACME v2 has limited revocation support — certctl records the revocation locally and serves it via CRL/OCSP.
|
||||
- **step-ca**: Calls step-ca's `/revoke` API endpoint. Clients should check step-ca's own CRL/OCSP for authoritative status.
|
||||
- **OpenSSL/Custom CA**: Invokes the configured revoke script (`CERTCTL_OPENSSL_REVOKE_SCRIPT`) with the serial number as an argument.
|
||||
|
||||
### EST Integration (GetCACertPEM)
|
||||
|
||||
The `GetCACertPEM()` method returns the PEM-encoded CA certificate chain, used by the EST server's `/.well-known/est/cacerts` endpoint (RFC 7030) to distribute the CA chain to enrolling devices. Each issuer handles this differently:
|
||||
|
||||
- **Local CA**: Returns the CA certificate PEM (self-signed or sub-CA cert). This is the primary EST issuer.
|
||||
- **ACME**: Returns error — ACME CAs provide chains per-issuance, not statically.
|
||||
- **step-ca**: Returns error — step-ca serves its own `/root` endpoint for CA distribution.
|
||||
- **OpenSSL/Custom CA**: Returns error — custom script-based CAs have no CA cert access through certctl.
|
||||
|
||||
Note: EST (Enrollment over Secure Transport) is not a connector — it's a protocol handler (`internal/api/handler/est.go`) that delegates certificate issuance to whichever issuer connector is configured via `CERTCTL_EST_ISSUER_ID`. See the [Architecture Guide](architecture.md#est-server-rfc-7030) for details.
|
||||
|
||||
### Planned Issuers
|
||||
|
||||
The following issuer connectors are planned for future milestones:
|
||||
|
||||
- **Vault PKI** — HashiCorp Vault's PKI secrets engine for organizations using Vault as their internal CA (planned for V4.0+).
|
||||
- **DigiCert** — Commercial CA integration via DigiCert's REST API (planned).
|
||||
|
||||
Note: ADCS (Active Directory Certificate Services) integration is handled via the **sub-CA mode** of the Local CA issuer, not as a separate connector. certctl operates as a subordinate CA with its signing certificate issued by ADCS, so all certctl-issued certs chain to the enterprise ADCS root. See the Local CA section above.
|
||||
|
||||
### Building a Custom Issuer
|
||||
|
||||
@@ -280,9 +471,87 @@ The `reload_command` defaults to `systemctl reload nginx` but can be overridden
|
||||
|
||||
Location: `internal/connector/target/nginx/nginx.go`
|
||||
|
||||
### Planned: F5 BIG-IP (Interface Only)
|
||||
### Built-in: Apache httpd
|
||||
|
||||
The F5 BIG-IP target connector interface is built with the iControl REST flow mapped out, but the actual API calls are not yet implemented. The planned flow is: authenticate via `POST /mgmt/shared/authn/login`, upload cert PEM via `POST /mgmt/tm/ltm/certificate`, update the SSL profile via `PATCH /mgmt/tm/ltm/profile/client-ssl/{profile}`, and validate deployment by checking profile status. Implementation is planned for V2.
|
||||
The Apache httpd connector follows the same pattern as NGINX: it writes separate certificate, chain, and key files to disk, validates the Apache configuration with `apachectl configtest`, and performs a graceful reload. The key difference is that private keys are written with 0600 permissions (owner-only read) for security, while cert and chain files use 0644.
|
||||
|
||||
Configuration:
|
||||
```json
|
||||
{
|
||||
"cert_path": "/etc/apache2/ssl/cert.pem",
|
||||
"chain_path": "/etc/apache2/ssl/chain.pem",
|
||||
"key_path": "/etc/apache2/ssl/key.pem",
|
||||
"reload_command": "apachectl graceful",
|
||||
"validate_command": "apachectl configtest"
|
||||
}
|
||||
```
|
||||
|
||||
The `reload_command` can be customized for different environments (e.g., `systemctl reload apache2` for systemd, `httpd -k graceful` for RHEL/CentOS). Validation output is captured and included in error messages for debugging.
|
||||
|
||||
Location: `internal/connector/target/apache/apache.go`
|
||||
|
||||
### Built-in: HAProxy
|
||||
|
||||
The HAProxy connector differs from NGINX and Apache because HAProxy expects all TLS material in a single combined PEM file (certificate + chain + private key concatenated). The connector builds this combined file, writes it with 0600 permissions (since it contains the private key), optionally validates the HAProxy configuration, and reloads.
|
||||
|
||||
Configuration:
|
||||
```json
|
||||
{
|
||||
"pem_path": "/etc/haproxy/certs/site.pem",
|
||||
"reload_command": "systemctl reload haproxy",
|
||||
"validate_command": "haproxy -c -f /etc/haproxy/haproxy.cfg"
|
||||
}
|
||||
```
|
||||
|
||||
The combined PEM is built in this order: server certificate, intermediate/chain certificates, private key. The `validate_command` is optional — if omitted, the connector skips config validation and goes straight to reload.
|
||||
|
||||
Location: `internal/connector/target/haproxy/haproxy.go`
|
||||
|
||||
### Built-in: Traefik
|
||||
|
||||
The Traefik connector uses Traefik's file provider — it writes certificate and key files to a watched directory, and Traefik automatically picks up the changes without any explicit reload command. This is the simplest deployment model: write the files, and Traefik does the rest.
|
||||
|
||||
Configuration:
|
||||
```json
|
||||
{
|
||||
"cert_dir": "/etc/traefik/certs",
|
||||
"cert_file": "site.crt",
|
||||
"key_file": "site.key"
|
||||
}
|
||||
```
|
||||
|
||||
The `cert_dir` is the directory Traefik is configured to watch via its file provider (e.g., `providers.file.directory` in Traefik's static config). The connector writes `cert_file` and `key_file` into this directory with appropriate permissions. Traefik's file watcher detects the change and reloads the TLS configuration automatically.
|
||||
|
||||
Location: `internal/connector/target/traefik/traefik.go`
|
||||
|
||||
### Built-in: Caddy
|
||||
|
||||
The Caddy connector supports two deployment modes — choose based on your Caddy setup:
|
||||
|
||||
**API mode (recommended):** Posts the certificate directly to Caddy's admin API (`POST /load` or certificate-specific endpoints) for zero-downtime hot reload. Requires Caddy's admin API to be enabled and accessible from the agent.
|
||||
|
||||
**File mode (fallback):** Writes cert and key files to disk, relying on Caddy's built-in file watcher or a manual reload. Use this when the admin API isn't available or when Caddy is configured to read certificates from disk.
|
||||
|
||||
Configuration:
|
||||
```json
|
||||
{
|
||||
"mode": "api",
|
||||
"admin_api": "http://localhost:2019",
|
||||
"cert_dir": "/etc/caddy/certs",
|
||||
"cert_file": "site.crt",
|
||||
"key_file": "site.key"
|
||||
}
|
||||
```
|
||||
|
||||
When `mode` is `"api"`, the connector posts the certificate to the admin API endpoint. When `mode` is `"file"`, it writes files to `cert_dir` (same pattern as Traefik). The `admin_api` field is ignored in file mode.
|
||||
|
||||
Location: `internal/connector/target/caddy/caddy.go`
|
||||
|
||||
### F5 BIG-IP (Interface Only)
|
||||
|
||||
The F5 BIG-IP target connector interface is defined with the iControl REST flow mapped out, but the actual API calls are not yet implemented. F5 appliances can't run agents directly, so this connector uses the **proxy agent pattern**: a designated agent in the same network zone picks up F5 deployment jobs and calls the iControl REST API. The server assigns the work; the proxy agent executes it.
|
||||
|
||||
The planned flow is: authenticate via `POST /mgmt/shared/authn/login`, upload cert PEM via `POST /mgmt/tm/ltm/certificate`, update the SSL profile via `PATCH /mgmt/tm/ltm/profile/client-ssl/{profile}`, and validate deployment by checking profile status.
|
||||
|
||||
Configuration (defined, not yet functional):
|
||||
```json
|
||||
@@ -295,24 +564,33 @@ Configuration (defined, not yet functional):
|
||||
}
|
||||
```
|
||||
|
||||
Note: F5 credentials are stored on the proxy agent, not on the control plane server. This limits the credential blast radius to the proxy agent's network zone.
|
||||
|
||||
Location: `internal/connector/target/f5/f5.go`
|
||||
|
||||
### Planned: IIS (Interface Only)
|
||||
### IIS (Interface Only, Dual-Mode)
|
||||
|
||||
The IIS target connector interface is built with the WinRM/PowerShell flow mapped out, but the actual remote execution is not yet implemented. The planned flow is: transfer a PFX bundle to the Windows server via WinRM, run `Import-PfxCertificate` to install it into the certificate store, and run `Set-WebBinding` to bind the certificate to the IIS site. Implementation is planned for V2.
|
||||
The IIS target connector supports two planned deployment modes:
|
||||
|
||||
**Agent-local (recommended):** A Windows agent runs directly on the IIS server and deploys certificates using PowerShell — `Import-PfxCertificate` to install into the certificate store and `Set-WebBinding` to bind to the IIS site. This is the preferred approach: no remote access needed, no credential management, same pull-based model as NGINX/Apache/HAProxy.
|
||||
|
||||
**Proxy agent WinRM (for agentless targets):** For Windows servers where you don't want to install an agent, a nearby Windows agent acts as a proxy and reaches the IIS box via WinRM. The proxy agent picks up the deployment job, transfers the PFX bundle over WinRM, and runs the PowerShell commands remotely. WinRM credentials are stored on the proxy agent, not on the control plane.
|
||||
|
||||
Configuration (defined, not yet functional):
|
||||
```json
|
||||
{
|
||||
"host": "iis-server.internal.example.com",
|
||||
"username": "Administrator",
|
||||
"password": "...",
|
||||
"mode": "local",
|
||||
"site_name": "Default Web Site",
|
||||
"cert_store": "WebHosting",
|
||||
"use_https": true
|
||||
"winrm_host": "",
|
||||
"winrm_username": "",
|
||||
"winrm_password": "",
|
||||
"winrm_use_https": true
|
||||
}
|
||||
```
|
||||
|
||||
When `mode` is `"local"`, the `winrm_*` fields are ignored. When `mode` is `"proxy"`, the agent connects to the remote IIS server via WinRM using the provided credentials.
|
||||
|
||||
Location: `internal/connector/target/iis/iis.go`
|
||||
|
||||
## Notifier Connector
|
||||
@@ -344,7 +622,76 @@ type Connector interface {
|
||||
}
|
||||
```
|
||||
|
||||
Built-in notifiers: **Email** (SMTP) and **Webhook** (HTTP POST).
|
||||
Built-in notifiers: **Email** (SMTP), **Webhook** (HTTP POST), **Slack** (incoming webhook), **Microsoft Teams** (MessageCard webhook), **PagerDuty** (Events API v2), and **OpsGenie** (Alert API v2).
|
||||
|
||||
### Email (SMTP) Notifier
|
||||
|
||||
The Email notifier sends transactional alerts and scheduled digests via SMTP. It bridges the connector-layer SMTP connector to the service-layer `Notifier` interface via the `NotifierAdapter`. Supports both plain text and HTML emails.
|
||||
|
||||
Configuration:
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `CERTCTL_SMTP_HOST` | — | SMTP server hostname (required to enable) |
|
||||
| `CERTCTL_SMTP_PORT` | 587 | SMTP port (TLS) |
|
||||
| `CERTCTL_SMTP_USERNAME` | — | SMTP authentication username (optional) |
|
||||
| `CERTCTL_SMTP_PASSWORD` | — | SMTP authentication password (optional) |
|
||||
| `CERTCTL_SMTP_FROM_ADDRESS` | — | Email from address (required) |
|
||||
| `CERTCTL_SMTP_USE_TLS` | true | Enable TLS encryption |
|
||||
|
||||
Example:
|
||||
```bash
|
||||
export CERTCTL_SMTP_HOST=smtp.gmail.com
|
||||
export CERTCTL_SMTP_PORT=587
|
||||
export CERTCTL_SMTP_USERNAME=admin@example.com
|
||||
export CERTCTL_SMTP_PASSWORD=app-password-123
|
||||
export CERTCTL_SMTP_FROM_ADDRESS=certctl@example.com
|
||||
```
|
||||
|
||||
### Scheduled Certificate Digest
|
||||
|
||||
The `DigestService` generates aggregated certificate digest emails and sends them on a configurable schedule. This is useful for periodic briefings on certificate inventory health — expiring certs, status summary, active agents, job trends.
|
||||
|
||||
The digest HTML template includes:
|
||||
- Total certificates, expiring soon, expired, active agents (stats grid)
|
||||
- Jobs completed/failed summary (30 days)
|
||||
- Expiring certificates table (color-coded by urgency: 7d, 14d, 30d)
|
||||
- Auto-refresh and responsive email layout
|
||||
|
||||
**Scheduler Integration:** The 7th scheduler loop runs on configurable interval (default 24 hours). It does NOT run on startup — waits for first scheduled tick. Operation timeout is 5 minutes. Each loop execution is guarded by `sync/atomic.Bool` idempotency.
|
||||
|
||||
Configuration:
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `CERTCTL_DIGEST_ENABLED` | false | Enable scheduled digest emails |
|
||||
| `CERTCTL_DIGEST_INTERVAL` | 24h | How often to send digest (any duration, e.g. 12h, 7d) |
|
||||
| `CERTCTL_DIGEST_RECIPIENTS` | — | Comma-separated email addresses. Falls back to certificate owner emails if empty |
|
||||
|
||||
API Endpoints:
|
||||
|
||||
- **`GET /api/v1/digest/preview`** — Render digest HTML for preview (no email sent)
|
||||
- **`POST /api/v1/digest/send`** — Trigger digest send immediately (outside of schedule)
|
||||
|
||||
Example:
|
||||
```bash
|
||||
# Preview digest
|
||||
curl http://localhost:8443/api/v1/digest/preview | jq '.html'
|
||||
|
||||
# Send digest immediately
|
||||
curl -X POST http://localhost:8443/api/v1/digest/send
|
||||
```
|
||||
|
||||
Each notifier is enabled by its configuration env var:
|
||||
|
||||
| Notifier | Env Var | Description |
|
||||
|----------|---------|-------------|
|
||||
| Email | `CERTCTL_SMTP_HOST` | SMTP email delivery. See Email Notifier section above |
|
||||
| Webhook | `CERTCTL_WEBHOOK_URL` | HTTP POST to any endpoint. Optional: `CERTCTL_WEBHOOK_SECRET` for HMAC signing |
|
||||
| Slack | `CERTCTL_SLACK_WEBHOOK_URL` | Incoming webhook URL. Optional: `CERTCTL_SLACK_CHANNEL`, `CERTCTL_SLACK_USERNAME` |
|
||||
| Teams | `CERTCTL_TEAMS_WEBHOOK_URL` | Incoming webhook URL (MessageCard format) |
|
||||
| PagerDuty | `CERTCTL_PAGERDUTY_ROUTING_KEY` | Events API v2 routing key. Optional: `CERTCTL_PAGERDUTY_SEVERITY` (default: "warning") |
|
||||
| OpsGenie | `CERTCTL_OPSGENIE_API_KEY` | Alert API GenieKey. Optional: `CERTCTL_OPSGENIE_PRIORITY` (default: "P3") |
|
||||
|
||||
In demo mode, notifications are marked as "sent" even without a configured notifier — this prevents error spam in the logs while still generating notification records for the dashboard to display.
|
||||
|
||||
@@ -448,6 +795,142 @@ docker rm -f nginx
|
||||
6. **Idempotent operations** — Deploying the same certificate twice should succeed, not fail
|
||||
7. **Report metadata** — Return deployment duration, target address, and other useful data in results
|
||||
|
||||
## Agent Discovery Scanner
|
||||
|
||||
Agents include a built-in certificate discovery scanner that walks configured directories and reports unmanaged certificates to the control plane. This is useful for discovering existing certificates already deployed in your infrastructure, so you can bring them under certctl's management.
|
||||
|
||||
### Configuration
|
||||
|
||||
Enable discovery on an agent by setting `CERTCTL_DISCOVERY_DIRS` to a comma-separated list of directories:
|
||||
|
||||
```bash
|
||||
export CERTCTL_DISCOVERY_DIRS="/etc/nginx/certs,/etc/ssl/certs,/etc/apache2/ssl"
|
||||
```
|
||||
|
||||
Or via command-line flag:
|
||||
|
||||
```bash
|
||||
./agent --agent-id agent-nginx-01 --discovery-dirs "/etc/nginx/certs,/etc/ssl/certs"
|
||||
```
|
||||
|
||||
The agent scans these directories on startup and every 6 hours, looking for certificate files in PEM or DER format (extensions: `.pem`, `.crt`, `.cer`, `.cert`, `.der`).
|
||||
|
||||
### How It Works
|
||||
|
||||
1. **Scan**: Agent recursively walks directories, extracts certificates
|
||||
2. **Deduplicate**: Control plane deduplicates by SHA-256 fingerprint (same cert in multiple locations is one discovery)
|
||||
3. **Store**: Discovered certificates stored with metadata (agent ID, file path, found date, fingerprint)
|
||||
4. **Triage**: Operators review discovered certs in the **Discovery** dashboard page (or via API) — claim to link to managed certificates, or dismiss false positives. The dashboard shows summary stats, filters by status and agent, and provides one-click claim/dismiss actions.
|
||||
|
||||
### API Endpoints
|
||||
|
||||
```bash
|
||||
# List discovered certificates (filter by agent, status)
|
||||
curl -s "http://localhost:8443/api/v1/discovered-certificates?agent_id=agent-nginx-01&status=new" | jq .
|
||||
|
||||
# Get discovery detail
|
||||
curl -s http://localhost:8443/api/v1/discovered-certificates/DISCOVERY_ID | jq .
|
||||
|
||||
# Claim a discovered cert (link to managed certificate)
|
||||
curl -s -X POST http://localhost:8443/api/v1/discovered-certificates/DISCOVERY_ID/claim \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"managed_certificate_id": "mc-api-prod"}' | jq .
|
||||
|
||||
# Dismiss a discovery
|
||||
curl -s -X POST http://localhost:8443/api/v1/discovered-certificates/DISCOVERY_ID/dismiss | jq .
|
||||
|
||||
# View discovery scan history
|
||||
curl -s http://localhost:8443/api/v1/discovery-scans | jq .
|
||||
|
||||
# Summary counts (new, claimed, dismissed)
|
||||
curl -s http://localhost:8443/api/v1/discovery-summary | jq .
|
||||
```
|
||||
|
||||
### Use Cases
|
||||
|
||||
- **Inventory audit** — Find all TLS certificates running in your infrastructure
|
||||
- **Migration** — Onboard existing certificates that were issued outside certctl
|
||||
- **Compliance** — Detect rogue/unauthorized certificates in monitored directories
|
||||
- **Integration** — Pull certificate data from systems that pre-generate certs (e.g., Kubernetes CertManager)
|
||||
|
||||
## Network Certificate Scanner (M21)
|
||||
|
||||
The control plane includes a built-in active TLS scanner that probes network endpoints and discovers certificates without requiring agent deployment. This complements the agent-based filesystem discovery with network-level visibility.
|
||||
|
||||
### Configuration
|
||||
|
||||
Enable network scanning on the server:
|
||||
|
||||
```bash
|
||||
export CERTCTL_NETWORK_SCAN_ENABLED=true
|
||||
export CERTCTL_NETWORK_SCAN_INTERVAL=6h # default
|
||||
```
|
||||
|
||||
### Creating Scan Targets
|
||||
|
||||
Network scan targets can be managed from the **Network Scans** dashboard page (create, edit, enable/disable, trigger on-demand scans) or via the API. Targets define which CIDR ranges and ports to probe:
|
||||
|
||||
```bash
|
||||
# Create a scan target for your internal network (or use the dashboard's "+ New Target" button)
|
||||
curl -s -X POST http://localhost:8443/api/v1/network-scan-targets \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"name": "Production Web Servers",
|
||||
"cidrs": ["10.0.1.0/24", "10.0.2.0/24"],
|
||||
"ports": [443, 8443, 6443],
|
||||
"enabled": true,
|
||||
"scan_interval_hours": 6,
|
||||
"timeout_ms": 5000
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
### How It Works
|
||||
|
||||
1. **Expand**: CIDR ranges are expanded to individual IPs (safety cap at /20 = 4096 IPs)
|
||||
2. **Probe**: Concurrent TLS connections (50 goroutines) with configurable timeout per endpoint
|
||||
3. **Extract**: Certificate metadata extracted from TLS handshake (CN, SANs, serial, issuer, key info, fingerprint)
|
||||
4. **Pipeline**: Results fed into the same `DiscoveryService.ProcessDiscoveryReport()` as filesystem discovery
|
||||
5. **Deduplicate**: Sentinel agent ID (`server-scanner`) with source_path as `ip:port` ensures proper dedup
|
||||
6. **Triage**: Discovered certs appear in the **Discovery** dashboard page (and via `GET /api/v1/discovered-certificates`) with `agent_id=server-scanner`
|
||||
|
||||
### API Endpoints
|
||||
|
||||
```bash
|
||||
# List all scan targets
|
||||
curl -s http://localhost:8443/api/v1/network-scan-targets | jq .
|
||||
|
||||
# Create a scan target
|
||||
curl -s -X POST http://localhost:8443/api/v1/network-scan-targets \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"name": "DMZ", "cidrs": ["172.16.0.0/24"], "ports": [443]}' | jq .
|
||||
|
||||
# Get a specific target (includes last_scan_at, last_scan_certs_found)
|
||||
curl -s http://localhost:8443/api/v1/network-scan-targets/nst-dmz | jq .
|
||||
|
||||
# Trigger an immediate scan (doesn't wait for scheduler)
|
||||
curl -s -X POST http://localhost:8443/api/v1/network-scan-targets/nst-dmz/scan | jq .
|
||||
|
||||
# Update scan configuration
|
||||
curl -s -X PUT http://localhost:8443/api/v1/network-scan-targets/nst-dmz \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"ports": [443, 8443, 9443], "timeout_ms": 3000}' | jq .
|
||||
|
||||
# Delete a scan target
|
||||
curl -s -X DELETE http://localhost:8443/api/v1/network-scan-targets/nst-dmz
|
||||
```
|
||||
|
||||
### Scheduler Integration
|
||||
|
||||
When `CERTCTL_NETWORK_SCAN_ENABLED=true`, the server runs a 6th scheduler loop (alongside renewal, jobs, health, notifications, and short-lived expiry). It scans all enabled targets at the configured interval (default 6h). Each target tracks `last_scan_at`, `last_scan_duration_ms`, and `last_scan_certs_found` for monitoring scan health.
|
||||
|
||||
### Use Cases
|
||||
|
||||
- **Network inventory** — "What TLS certs are deployed across my network?" without deploying agents
|
||||
- **Shadow certificate detection** — Find certificates on services you didn't know were running TLS
|
||||
- **Compliance scanning** — Prove to auditors that all TLS endpoints are inventoried
|
||||
- **Migration assessment** — Scan a network range before onboarding to certctl management
|
||||
- **Expiration monitoring** — Discover soon-to-expire certs on network endpoints before they cause outages
|
||||
|
||||
## What's Next
|
||||
|
||||
- [Architecture Guide](architecture.md) — Understanding the full system design
|
||||
|
||||
@@ -5,6 +5,41 @@ This demo goes beyond browsing pre-loaded data. You'll create a team, register a
|
||||
**Time**: 15-20 minutes
|
||||
**Prerequisites**: certctl running via Docker Compose (see [Quick Start](quickstart.md))
|
||||
|
||||
## Contents
|
||||
|
||||
1. [Setup](#setup)
|
||||
2. [How the pieces fit together](#how-the-pieces-fit-together)
|
||||
3. [Alternative Issuers Reference](#alternative-issuers-reference)
|
||||
- [Sub-CA Mode](#sub-ca-mode-local-ca-chained-to-enterprise-root)
|
||||
- [ACME with ZeroSSL](#acme-with-zerossl-auto-eab)
|
||||
- [ACME with DNS-01 Challenges](#acme-with-dns-01-challenges-wildcard-certificates)
|
||||
- [ACME with DNS-PERSIST-01](#acme-with-dns-persist-01-zero-touch-renewals)
|
||||
- [step-ca (Smallstep Private CA)](#step-ca-smallstep-private-ca)
|
||||
- [OpenSSL / Custom CA](#openssl--custom-ca-script-based)
|
||||
4. [Part 1: Build the Organization Structure](#part-1-build-the-organization-structure)
|
||||
5. [Part 2: Verify the Issuer](#part-2-verify-the-issuer)
|
||||
6. [Part 3: Create a Managed Certificate](#part-3-create-a-managed-certificate)
|
||||
7. [Part 4: Trigger Certificate Renewal](#part-4-trigger-certificate-renewal)
|
||||
8. [Part 4.5: Manage Deployment Targets](#part-45-manage-deployment-targets)
|
||||
9. [Part 5: Deploy the Certificate](#part-5-deploy-the-certificate)
|
||||
10. [Part 6: View the Audit Trail](#part-6-view-the-audit-trail-immutable-api-audit-log)
|
||||
11. [Part 7: Check Notifications](#part-7-check-notifications)
|
||||
12. [Part 8: Create a Second Certificate and Compare](#part-8-create-a-second-certificate-and-compare)
|
||||
13. [Part 8.5: Revoke a Certificate](#part-85-revoke-a-certificate)
|
||||
14. [Part 9: Policy Violations](#part-9-policy-violations)
|
||||
15. [Part 9.5: Dashboard Stats and Metrics](#part-95-dashboard-stats-and-metrics)
|
||||
16. [Part 10: Certificate Profiles](#part-10-certificate-profiles)
|
||||
17. [Part 11: Agent Groups](#part-11-agent-groups)
|
||||
18. [Part 12: Interactive Approval Workflow](#part-12-interactive-approval-workflow)
|
||||
19. [Part 13: Advanced Query Features](#part-13-advanced-query-features)
|
||||
20. [Part 14: CLI Tool](#part-14-cli-tool-m16b)
|
||||
21. [Part 15: MCP Server for AI Integration](#part-15-mcp-server-for-ai-integration-m18a)
|
||||
22. [Part 16: Certificate Discovery](#part-16-certificate-discovery-m18b--m21)
|
||||
23. [End-to-End Architecture Summary](#end-to-end-architecture-summary)
|
||||
24. [Full Automated Script](#full-automated-script)
|
||||
25. [What to Show Stakeholders](#what-to-show-stakeholders)
|
||||
26. [Teardown](#teardown)
|
||||
|
||||
## Setup
|
||||
|
||||
Make sure certctl is running:
|
||||
@@ -33,13 +68,154 @@ flowchart LR
|
||||
B --> C[Create\nCertificate]
|
||||
C --> D[Trigger\nRenewal]
|
||||
D --> E[Trigger\nDeployment]
|
||||
E --> F[Inspect Audit\n& Notifications]
|
||||
E --> F[Revoke a\nCertificate]
|
||||
F --> G[Check Stats\n& Metrics]
|
||||
G --> H[Inspect Audit\n& Notifications]
|
||||
```
|
||||
|
||||
Each step corresponds to a real operation that certctl would perform in production. The difference here is that we're driving each step manually via curl instead of letting the scheduler and agents handle it automatically.
|
||||
|
||||
---
|
||||
|
||||
## Alternative Issuers Reference
|
||||
|
||||
certctl ships with multiple issuer connectors. The demo uses the Local CA, but here's how to set up others:
|
||||
|
||||
### Sub-CA Mode (Local CA chained to enterprise root)
|
||||
|
||||
For enterprises with ADCS, root CAs, or intermediate CAs:
|
||||
|
||||
```bash
|
||||
# Place your CA certificate and key on the server
|
||||
export CERTCTL_CA_CERT_PATH="/etc/certctl/ca-cert.pem"
|
||||
export CERTCTL_CA_KEY_PATH="/etc/certctl/ca-key.pem"
|
||||
|
||||
# Restart the server. The Local CA connector loads the cert+key from disk
|
||||
# All issued certificates now chain to your enterprise root
|
||||
docker compose -f deploy/docker-compose.yml restart server
|
||||
```
|
||||
|
||||
The CA key can be RSA, ECDSA, or PKCS#8 format. The connector validates that the certificate has `IsCA=true` and `KeyUsageCertSign`.
|
||||
|
||||
### ACME with ZeroSSL (Auto-EAB)
|
||||
|
||||
ZeroSSL is a free ACME CA that requires External Account Binding (EAB) for account registration. certctl auto-fetches EAB credentials from ZeroSSL's public API when the directory URL is detected as ZeroSSL and no EAB credentials are provided — you just need an email address:
|
||||
|
||||
```bash
|
||||
# Minimal config — certctl auto-fetches EAB credentials from ZeroSSL
|
||||
export CERTCTL_ACME_DIRECTORY_URL="https://acme.zerossl.com/v2/DV90"
|
||||
export CERTCTL_ACME_EMAIL="ops@example.com"
|
||||
```
|
||||
|
||||
No dashboard visit, no manual EAB credential copy-paste. certctl calls `api.zerossl.com/acme/eab-credentials-email` with your email, gets back a KID + HMAC key, and uses them for ACME account registration automatically.
|
||||
|
||||
If you already have EAB credentials (e.g., from the ZeroSSL dashboard or for other CAs like Google Trust Services or SSL.com), you can provide them explicitly:
|
||||
|
||||
```bash
|
||||
export CERTCTL_ACME_DIRECTORY_URL="https://acme.zerossl.com/v2/DV90"
|
||||
export CERTCTL_ACME_EMAIL="ops@example.com"
|
||||
export CERTCTL_ACME_EAB_KID="your-key-id"
|
||||
export CERTCTL_ACME_EAB_HMAC="your-base64url-hmac-key"
|
||||
```
|
||||
|
||||
### ACME with DNS-01 Challenges (Wildcard Certificates)
|
||||
|
||||
For Let's Encrypt or other ACME providers with wildcard support:
|
||||
|
||||
```bash
|
||||
# Configure ACME DNS-01 with a DNS provider script
|
||||
export CERTCTL_ACME_CHALLENGE_TYPE="dns-01"
|
||||
export CERTCTL_ACME_DNS_PRESENT_SCRIPT="/usr/local/bin/dns-present.sh"
|
||||
export CERTCTL_ACME_DNS_CLEANUP_SCRIPT="/usr/local/bin/dns-cleanup.sh"
|
||||
export CERTCTL_ACME_DNS_PROPAGATION_WAIT="10" # seconds to wait for DNS propagation
|
||||
|
||||
# Example dns-present.sh for Cloudflare:
|
||||
# #!/bin/bash
|
||||
# RECORD_NAME=$1
|
||||
# RECORD_VALUE=$2
|
||||
# curl -X POST "https://api.cloudflare.com/client/v4/zones/ZONE_ID/dns_records" \
|
||||
# -H "Authorization: Bearer $CLOUDFLARE_API_TOKEN" \
|
||||
# -d "{\"type\":\"TXT\",\"name\":\"$RECORD_NAME\",\"content\":\"$RECORD_VALUE\"}"
|
||||
```
|
||||
|
||||
Then issue wildcard certificates:
|
||||
```bash
|
||||
curl -s -X POST $API/api/v1/certificates \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"id": "mc-wildcard-api",
|
||||
"name": "Wildcard API Certificate",
|
||||
"common_name": "*.api.example.com",
|
||||
"sans": ["*.api.example.com", "api.example.com"],
|
||||
"issuer_id": "iss-acme",
|
||||
"renewal_policy_id": "rp-default",
|
||||
"status": "Pending"
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
### ACME with DNS-PERSIST-01 (Zero-Touch Renewals)
|
||||
|
||||
DNS-PERSIST-01 uses a standing `_validation-persist` TXT record that you set once. The CA revalidates it on every renewal — no per-renewal DNS updates, no cleanup scripts, no propagation waits. If the CA doesn't support DNS-PERSIST-01 yet, certctl falls back to DNS-01 automatically.
|
||||
|
||||
```bash
|
||||
# Configure ACME DNS-PERSIST-01
|
||||
export CERTCTL_ACME_CHALLENGE_TYPE="dns-persist-01"
|
||||
export CERTCTL_ACME_DNS_PRESENT_SCRIPT="/usr/local/bin/dns-present.sh"
|
||||
export CERTCTL_ACME_DNS_PERSIST_ISSUER_DOMAIN="letsencrypt.org"
|
||||
|
||||
# The present script creates a _validation-persist.<domain> TXT record with value:
|
||||
# "letsencrypt.org; accounturi=https://acme-v02.api.letsencrypt.org/acme/acct/12345"
|
||||
# This record is set once and never touched again.
|
||||
```
|
||||
|
||||
### step-ca (Smallstep Private CA)
|
||||
|
||||
For organizations running step-ca as their private CA:
|
||||
|
||||
```bash
|
||||
# Configure step-ca connector
|
||||
export CERTCTL_STEPCA_URL="https://ca.internal.example.com"
|
||||
export CERTCTL_STEPCA_FINGERPRINT="your-ca-fingerprint" # From `step ca bootstrap`
|
||||
export CERTCTL_STEPCA_PROVISIONER="certctl-admin" # Name of the JWK provisioner
|
||||
export CERTCTL_STEPCA_PROVISIONER_JWK="/etc/certctl/provisioner.json" # Path to JWK private key
|
||||
```
|
||||
|
||||
Then use step-ca as the issuer:
|
||||
```bash
|
||||
curl -s -X POST $API/api/v1/certificates \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"id": "mc-stepca-cert",
|
||||
"name": "Certificate from step-ca",
|
||||
"common_name": "service.internal.example.com",
|
||||
"issuer_id": "iss-stepca",
|
||||
"renewal_policy_id": "rp-default",
|
||||
"status": "Pending"
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
### OpenSSL / Custom CA (Script-based)
|
||||
|
||||
For custom signing workflows via shell scripts:
|
||||
|
||||
```bash
|
||||
# Configure OpenSSL connector with user-provided scripts
|
||||
export CERTCTL_OPENSSL_SIGN_SCRIPT="/usr/local/bin/custom-sign.sh"
|
||||
export CERTCTL_OPENSSL_REVOKE_SCRIPT="/usr/local/bin/custom-revoke.sh"
|
||||
export CERTCTL_OPENSSL_CRL_SCRIPT="/usr/local/bin/custom-crl.sh"
|
||||
export CERTCTL_OPENSSL_TIMEOUT_SECONDS="30"
|
||||
|
||||
# Example custom-sign.sh:
|
||||
# #!/bin/bash
|
||||
# CSR_PEM=$1
|
||||
# VALIDITY_DAYS=$2
|
||||
# # Do something custom with the CSR and return signed certificate
|
||||
# openssl ca -in <(echo "$CSR_PEM") -days $VALIDITY_DAYS -out /tmp/signed.pem
|
||||
# cat /tmp/signed.pem
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Part 1: Build the Organization Structure
|
||||
|
||||
### Create a new team
|
||||
@@ -99,12 +275,12 @@ You should see:
|
||||
{
|
||||
"id": "iss-local",
|
||||
"name": "Local Dev CA",
|
||||
"type": "GenericCA",
|
||||
"type": "local",
|
||||
"enabled": true
|
||||
}
|
||||
```
|
||||
|
||||
**How it works:** The issuer record was inserted during database seeding (`migrations/seed_demo.sql`). The `type` field (`GenericCA`) maps to a connector implementation. When the server starts, it registers connector instances in an `issuerRegistry` map keyed by issuer ID. When a certificate needs issuance, the service layer looks up the issuer ID in this registry to find the right connector.
|
||||
**How it works:** The issuer record was inserted during database seeding (`migrations/seed_demo.sql`). The `type` field (`local`) maps to a connector implementation. When the server starts, it registers connector instances in an `issuerRegistry` map keyed by issuer ID. When a certificate needs issuance, the service layer looks up the issuer ID in this registry to find the right connector.
|
||||
|
||||
**How the Local CA works internally:** The Local CA connector (`internal/connector/issuer/local/local.go`) generates a self-signed root CA certificate on first use using Go's `crypto/x509` package. The CA key pair lives in memory only — it's regenerated each time the server restarts, which means all certificates it issued become untrusted on restart (acceptable for dev/demo). When it receives an `IssuanceRequest` containing a CSR (Certificate Signing Request), it:
|
||||
|
||||
@@ -116,7 +292,7 @@ You should see:
|
||||
|
||||
The result is a structurally valid X.509 certificate — browsers won't trust it (no root CA in their trust store), but it exercises the exact same code paths that a production ACME or Vault issuer would.
|
||||
|
||||
**Why pluggable issuers:** Different organizations use different CAs. Some use Let's Encrypt (ACME protocol), some use step-ca or internal PKI (Vault, ADCS), some use commercial CAs (DigiCert, Entrust, GlobalSign), and some have custom OpenSSL-based workflows. The connector interface means certctl doesn't care — it calls `IssueCertificate()` and gets back a signed cert regardless of the backend. V1 ships with Local CA and ACME (HTTP-01); step-ca, ADCS, OpenSSL/custom CA are planned for V2; DigiCert, Vault PKI, Entrust, GlobalSign, Google CAS, and EJBCA are planned for V3.
|
||||
**Why pluggable issuers:** Different organizations use different CAs. Some use Let's Encrypt (ACME protocol), some use step-ca or internal PKI (Vault), some use commercial CAs (DigiCert, Entrust, GlobalSign), and some have custom OpenSSL-based workflows. For enterprises with ADCS, certctl can operate as a sub-CA — all issued certs chain to the enterprise root. The connector interface means certctl doesn't care — it calls `IssueCertificate()` and gets back a signed cert regardless of the backend. V1 ships with Local CA (self-signed or sub-CA), ACME (HTTP-01 + DNS-01 + DNS-PERSIST-01 for wildcards), and step-ca (Smallstep private CA via native /sign API). V2 adds the OpenSSL/Custom CA connector (script-based signing). DigiCert, Vault PKI, Entrust, GlobalSign, Google CAS, and EJBCA are planned for V3+.
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
@@ -127,15 +303,14 @@ flowchart TD
|
||||
D["GetOrderStatus(orderID)"]
|
||||
end
|
||||
|
||||
A --> E["Local CA\n(crypto/x509)"]
|
||||
A --> E["Local CA\n(self-signed or sub-CA)"]
|
||||
A --> F["ACME\n(Let's Encrypt)"]
|
||||
A --> G["step-ca\n(planned V2)"]
|
||||
A --> H["OpenSSL / Custom CA\n(planned V2)"]
|
||||
A --> I["ADCS\n(planned V2)"]
|
||||
A --> J["DigiCert API\n(planned V2.3)"]
|
||||
A --> K["Vault PKI\n(planned V3)"]
|
||||
A --> L["Entrust / GlobalSign\n(planned V3)"]
|
||||
A --> M["Google CAS / EJBCA\n(planned V3)"]
|
||||
A --> G["step-ca\n(implemented)"]
|
||||
A --> H["OpenSSL / Custom CA\n(script-based)"]
|
||||
A --> J["DigiCert API\n(planned)"]
|
||||
A --> K["Vault PKI\n(planned)"]
|
||||
A --> L["Entrust / GlobalSign\n(planned)"]
|
||||
A --> M["Google CAS / EJBCA\n(planned)"]
|
||||
```
|
||||
|
||||
---
|
||||
@@ -268,6 +443,39 @@ curl -s "$API/api/v1/jobs" | jq '.data[] | select(.certificate_id == "mc-demo-ap
|
||||
|
||||
---
|
||||
|
||||
## Part 4.5: Manage Deployment Targets
|
||||
|
||||
Before deploying, you need targets. The demo seeds 5 targets, but you can also create, update, and delete them via API:
|
||||
|
||||
```bash
|
||||
# List all targets
|
||||
curl -s "$API/api/v1/targets" | jq '.data[] | {id, name, type, agent_id}'
|
||||
|
||||
# Create a new NGINX target
|
||||
curl -s -X POST "$API/api/v1/targets" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"id": "tgt-nginx-api",
|
||||
"name": "API NGINX",
|
||||
"type": "nginx",
|
||||
"agent_id": "ag-web-prod",
|
||||
"config": {"cert_path": "/etc/nginx/certs/api.crt", "key_path": "/etc/nginx/certs/api.key", "reload_command": "systemctl reload nginx"},
|
||||
"enabled": true
|
||||
}' | jq .
|
||||
|
||||
# Update a target
|
||||
curl -s -X PUT "$API/api/v1/targets/tgt-nginx-api" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"name": "API NGINX (updated)", "type": "nginx", "agent_id": "ag-web-prod", "config": {"cert_path": "/etc/nginx/certs/api.crt"}, "enabled": true}' | jq .
|
||||
|
||||
# Delete a target
|
||||
curl -s -X DELETE "$API/api/v1/targets/tgt-nginx-api"
|
||||
```
|
||||
|
||||
Each target type (NGINX, Apache, HAProxy, F5, IIS) accepts different configuration fields. The `config` JSON is validated at deployment time by the target connector.
|
||||
|
||||
---
|
||||
|
||||
## Part 5: Deploy the Certificate
|
||||
|
||||
Trigger deployment to see the deployment workflow:
|
||||
@@ -308,13 +516,13 @@ sequenceDiagram
|
||||
TC->>T: Run: nginx -t (validate config)
|
||||
TC->>T: Run: systemctl reload nginx
|
||||
TC-->>A: {success: true, deployed_at: "..."}
|
||||
else F5 Target
|
||||
TC->>T: POST /mgmt/tm/sys/crypto/cert (upload cert)
|
||||
TC->>T: PUT /mgmt/tm/ltm/virtual (bind to virtual server)
|
||||
else F5 Target (via proxy agent)
|
||||
TC->>T: iControl REST: POST /mgmt/tm/sys/crypto/cert
|
||||
TC->>T: iControl REST: PUT /mgmt/tm/ltm/virtual
|
||||
TC-->>A: {success: true, deployed_at: "..."}
|
||||
else IIS Target
|
||||
TC->>T: WinRM: Import-PfxCertificate
|
||||
TC->>T: WinRM: Set-WebBinding -SslFlags
|
||||
else IIS Target (agent-local)
|
||||
TC->>T: PowerShell: Import-PfxCertificate
|
||||
TC->>T: PowerShell: Set-WebBinding -SslFlags
|
||||
TC-->>A: {success: true, deployed_at: "..."}
|
||||
end
|
||||
|
||||
@@ -335,14 +543,14 @@ In production, agents poll for work and report results. You can simulate this ma
|
||||
|
||||
```bash
|
||||
# Poll for pending deployment work (as an agent)
|
||||
curl -s "$API/api/v1/agents/agent-nginx-prod/work" | jq .
|
||||
curl -s "$API/api/v1/agents/ag-web-prod/work" | jq .
|
||||
```
|
||||
|
||||
This returns pending deployment jobs assigned to the agent. The agent would then fetch the certificate, deploy it, and report back:
|
||||
|
||||
```bash
|
||||
# Report job completion (replace JOB_ID with an actual job ID from the work response)
|
||||
curl -s -X POST "$API/api/v1/agents/agent-nginx-prod/jobs/JOB_ID/status" \
|
||||
curl -s -X POST "$API/api/v1/agents/ag-web-prod/jobs/JOB_ID/status" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"status": "Completed",
|
||||
@@ -354,29 +562,47 @@ curl -s -X POST "$API/api/v1/agents/agent-nginx-prod/jobs/JOB_ID/status" \
|
||||
|
||||
---
|
||||
|
||||
## Part 6: View the Audit Trail
|
||||
## Part 6: View the Audit Trail (Immutable API Audit Log)
|
||||
|
||||
Every action you've taken has been recorded. Check the audit trail:
|
||||
Every API call and state change is recorded in an immutable, append-only audit trail. Check the recent audit events:
|
||||
|
||||
```bash
|
||||
curl -s $API/api/v1/audit | jq '.data[0:5]'
|
||||
# List recent audit events
|
||||
curl -s $API/api/v1/audit | jq '.data[0:10]'
|
||||
|
||||
# Filter by action (e.g., all certificate creations)
|
||||
curl -s "$API/api/v1/audit?action=certificate_created" | jq '.data[] | {actor, action, resource_id, timestamp}'
|
||||
|
||||
# Filter by resource (e.g., all actions on mc-demo-api)
|
||||
curl -s "$API/api/v1/audit?resource_id=mc-demo-api" | jq '.data[] | {actor, action, timestamp}'
|
||||
|
||||
# Filter by actor (e.g., all actions by a specific owner)
|
||||
curl -s "$API/api/v1/audit?actor=o-demo-user" | jq '.data[] | {action, resource_type, timestamp}'
|
||||
|
||||
# Time-range filter (e.g., last hour)
|
||||
curl -s "$API/api/v1/audit?created_after=2026-03-24T09:00:00Z" | jq '.data | length'
|
||||
|
||||
# Export audit trail (CSV format via GUI)
|
||||
# Available on the Audit page with applied filters
|
||||
```
|
||||
|
||||
**How it works:** The `audit_events` table is append-only — there is no `UPDATE` or `DELETE` in the `AuditRepository` interface. This is a deliberate design decision for compliance. Every service method that mutates state calls `AuditService.Create()` with:
|
||||
**How it works:** The `audit_events` table is append-only — there is no `UPDATE` or `DELETE` in the `AuditRepository` interface. Every API call (including this audit query) is recorded by the API audit middleware with:
|
||||
|
||||
| Field | Source | Example |
|
||||
|-------|--------|---------|
|
||||
| `actor` | The authenticated user or system component | `"o-demo-user"`, `"system"`, `"agent-prod-01"` |
|
||||
| `actor` | The authenticated user extracted from auth context | `"o-demo-user"`, `"system"`, `"agent-prod-01"`, `"anonymous"` |
|
||||
| `actor_type` | Category of the actor | `"User"`, `"System"`, `"Agent"` |
|
||||
| `action` | What happened | `"certificate_created"`, `"renewal_triggered"`, `"deployment_completed"` |
|
||||
| `resource_type` | What was affected | `"certificate"`, `"team"`, `"agent"` |
|
||||
| `action` | What happened | `"certificate_created"`, `"renewal_triggered"`, `"deployment_completed"`, `"api_call"` |
|
||||
| `resource_type` | What was affected | `"certificate"`, `"team"`, `"agent"`, `"audit"` |
|
||||
| `resource_id` | Specific resource | `"mc-demo-api"` |
|
||||
| `details` | Arbitrary JSON context | `{"environment": "staging", "issuer": "iss-local"}` |
|
||||
| `details` | Arbitrary JSON context | `{"environment": "staging", "issuer": "iss-local", "body_hash": "abc123..." }` |
|
||||
| `timestamp` | When it happened (server clock) | `"2026-03-14T10:30:00Z"` |
|
||||
|
||||
**Why immutable audit:** Compliance frameworks (SOC 2 Type II, PCI-DSS, ISO 27001) require tamper-evident audit logs. By making the repository interface append-only, even a compromised API server can't retroactively delete or modify audit records. In a production deployment, you'd also stream these to an external SIEM (Splunk, Datadog) for additional protection.
|
||||
The audit middleware (M19) records every HTTP request: method, path, status code, actor, request body SHA-256 hash, and latency. This creates a complete API audit trail without blocking responses (logging happens asynchronously).
|
||||
|
||||
**Check the dashboard.** The "Audit" view shows the full timeline of all actions across the system.
|
||||
**Why immutable audit:** Compliance frameworks (SOC 2 Type II, PCI-DSS, ISO 27001) require tamper-evident audit logs. By making the repository interface append-only and recording API calls, even a compromised API server can't retroactively delete or modify audit records. In a production deployment, you'd also stream these to an external SIEM (Splunk, Datadog) for additional protection.
|
||||
|
||||
**Check the dashboard.** The "Audit" view shows the full timeline of all actions across the system with filtering and CSV/JSON export.
|
||||
|
||||
---
|
||||
|
||||
@@ -388,7 +614,7 @@ Certctl sends notifications for certificate lifecycle events. Check what notific
|
||||
curl -s $API/api/v1/notifications | jq '.data[0:5]'
|
||||
```
|
||||
|
||||
**How it works:** The `NotificationService` generates notification records in the `notification_events` table whenever significant events occur — expiration warnings at configurable thresholds (30, 14, 7, 0 days by default), renewal success/failure, deployment results, and policy violations. Each notification has a `channel` (Email, Webhook) and a `recipient`.
|
||||
**How it works:** The `NotificationService` generates notification records in the `notification_events` table whenever significant events occur — expiration warnings at configurable thresholds (30, 14, 7, 0 days by default), renewal success/failure, deployment results, and policy violations. Each notification has a `channel` (Email, Webhook, Slack, Teams, PagerDuty, OpsGenie) and a `recipient`.
|
||||
|
||||
**Threshold-Based Alerting:** Each renewal policy defines configurable alert thresholds via the `alert_thresholds_days` field (e.g., `[30, 14, 7, 0]` for the standard policy, `[14, 7, 3, 0]` for the urgent policy). The scheduler checks which thresholds each certificate has crossed and sends one notification per threshold, deduplicated so the same alert is never sent twice. Certificates are automatically transitioned to `Expiring` status when entering the alert window and `Expired` when they hit 0 days.
|
||||
|
||||
@@ -409,6 +635,36 @@ flowchart TD
|
||||
|
||||
**Why graceful notifier fallback:** In demo mode, no SMTP server or webhook endpoint is configured. Rather than spamming error logs with "notifier not found" every 60 seconds (which was the original behavior — we fixed this), the service marks notifications as "sent" when no notifier is registered for the channel. This keeps the notification records visible in the dashboard without requiring external infrastructure.
|
||||
|
||||
### Configuring Notifier Connectors
|
||||
|
||||
In production, enable notifiers by setting environment variables:
|
||||
|
||||
**Slack:**
|
||||
```bash
|
||||
export CERTCTL_SLACK_WEBHOOK_URL="https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
|
||||
export CERTCTL_SLACK_CHANNEL="cert-alerts" # Optional, overrides channel in webhook
|
||||
export CERTCTL_SLACK_USERNAME="CertCTL" # Optional, defaults to "CertCTL"
|
||||
```
|
||||
|
||||
**Microsoft Teams:**
|
||||
```bash
|
||||
export CERTCTL_TEAMS_WEBHOOK_URL="https://outlook.webhook.office.com/webhookb2/..."
|
||||
```
|
||||
|
||||
**PagerDuty:**
|
||||
```bash
|
||||
export CERTCTL_PAGERDUTY_ROUTING_KEY="your-routing-key"
|
||||
export CERTCTL_PAGERDUTY_SEVERITY="warning" # Or: critical, error, info
|
||||
```
|
||||
|
||||
**OpsGenie:**
|
||||
```bash
|
||||
export CERTCTL_OPSGENIE_API_KEY="your-api-key"
|
||||
export CERTCTL_OPSGENIE_PRIORITY="P3" # Or: P1, P2, P4, P5
|
||||
```
|
||||
|
||||
When certificates expire, renewal fails, or policies are violated, certctl sends notifications via the configured channels. Each notifier connector implements the `Notifier` interface: `Send(ctx context.Context, recipient, subject, body string) error`. The notification processor handles retries and failure recording.
|
||||
|
||||
---
|
||||
|
||||
## Part 8: Create a Second Certificate and Compare
|
||||
@@ -448,6 +704,50 @@ curl -s -X POST $API/api/v1/certificates \
|
||||
|
||||
---
|
||||
|
||||
## Part 8.5: Revoke a Certificate
|
||||
|
||||
Let's revoke the payments gateway certificate — simulating a key compromise scenario:
|
||||
|
||||
```bash
|
||||
curl -s -X POST $API/api/v1/certificates/mc-demo-payments/revoke \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"reason": "keyCompromise"}' | jq .
|
||||
```
|
||||
|
||||
**How it works:** The `RevokeCertificateWithActor` service method executes a 7-step process:
|
||||
|
||||
1. Validates the certificate is eligible (not already revoked, not archived)
|
||||
2. Retrieves the latest certificate version to get the serial number
|
||||
3. Updates the certificate status to "Revoked" with a timestamp and reason
|
||||
4. Records the revocation in the `certificate_revocations` table (idempotent via ON CONFLICT)
|
||||
5. Notifies the issuing CA (best-effort — revocation succeeds even if the CA is unreachable)
|
||||
6. Creates an audit trail entry
|
||||
7. Sends revocation notifications via configured channels
|
||||
|
||||
Check the CRL (Certificate Revocation List):
|
||||
|
||||
```bash
|
||||
# JSON-formatted CRL
|
||||
curl -s $API/api/v1/crl | jq .
|
||||
|
||||
# DER-encoded X.509 CRL for the local CA (binary — pipe to openssl for inspection)
|
||||
curl -s $API/api/v1/crl/iss-local -o /tmp/crl.der
|
||||
openssl crl -inform DER -in /tmp/crl.der -text -noout
|
||||
```
|
||||
|
||||
Check OCSP status:
|
||||
|
||||
```bash
|
||||
# Replace SERIAL with the actual serial number from the certificate version
|
||||
curl -s $API/api/v1/ocsp/iss-local/SERIAL | jq .
|
||||
```
|
||||
|
||||
**Why RFC 5280 reason codes:** The reason code isn't just metadata — it tells clients *why* the certificate was revoked. A `keyCompromise` revocation means the private key was exposed and the certificate should be distrusted immediately. A `superseded` revocation means a newer certificate replaced it — less urgent. CRLs and OCSP responses include the reason code so client software can make informed trust decisions.
|
||||
|
||||
**Check the dashboard.** Click the payments certificate — you'll see a revocation banner with the reason code and timestamp.
|
||||
|
||||
---
|
||||
|
||||
## Part 9: Policy Violations
|
||||
|
||||
Let's see what happens when a certificate doesn't meet policy requirements. Check existing policy rules:
|
||||
@@ -479,6 +779,358 @@ curl -s "$API/api/v1/policies/pr-max-certificate-lifetime/violations" | jq .
|
||||
|
||||
---
|
||||
|
||||
## Part 9.5: Dashboard Stats and Metrics
|
||||
|
||||
certctl exposes operational metrics so you can monitor the health of your certificate infrastructure:
|
||||
|
||||
```bash
|
||||
# Dashboard summary — total certs, expiring, expired, active
|
||||
curl -s $API/api/v1/stats/summary | jq .
|
||||
|
||||
# Certificates grouped by status
|
||||
curl -s $API/api/v1/stats/certificates-by-status | jq .
|
||||
|
||||
# Expiration timeline — how many certs expire in the next 90 days
|
||||
curl -s "$API/api/v1/stats/expiration-timeline?days=90" | jq .
|
||||
|
||||
# Job trends — completed vs failed jobs over 30 days
|
||||
curl -s "$API/api/v1/stats/job-trends?days=30" | jq .
|
||||
|
||||
# Issuance rate — new certificates per day over 30 days
|
||||
curl -s "$API/api/v1/stats/issuance-rate?days=30" | jq .
|
||||
|
||||
# System metrics — gauges, counters, uptime (JSON)
|
||||
curl -s $API/api/v1/metrics | jq .
|
||||
|
||||
# System metrics — Prometheus exposition format (for Prometheus/Grafana/Datadog scraping)
|
||||
curl -s $API/api/v1/metrics/prometheus
|
||||
```
|
||||
|
||||
**How it works:** The `StatsService` computes aggregations in Go from existing repository List methods — no additional SQL queries or materialized views. This keeps the database schema simple while providing real-time dashboard data. The JSON metrics endpoint returns gauges (cert totals by status, agent counts, pending jobs), counters (completed/failed jobs), and server uptime. The Prometheus endpoint (`/api/v1/metrics/prometheus`) exposes the same data in Prometheus exposition format (`text/plain; version=0.0.4`) with `certctl_` prefixed metric names — ready for scraping by Prometheus, Grafana Agent, Datadog Agent, or Victoria Metrics.
|
||||
|
||||
**In the dashboard**, these stats power four interactive charts: an expiration heatmap, renewal success rate trends, certificate status distribution, and issuance rate. The agent fleet overview page uses agent metadata to group by OS, architecture, and version.
|
||||
|
||||
---
|
||||
|
||||
## Part 10: Certificate Profiles
|
||||
|
||||
Profiles define the cryptographic constraints for a class of certificates. Let's explore the demo profiles:
|
||||
|
||||
```bash
|
||||
# List all profiles
|
||||
curl -s $API/api/v1/profiles | jq '.data[] | {id, name, allowed_key_algorithms, max_validity_days}'
|
||||
```
|
||||
|
||||
Create a new profile for high-security certificates:
|
||||
|
||||
```bash
|
||||
curl -s -X POST $API/api/v1/profiles \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"id": "prof-demo-hsec",
|
||||
"name": "Demo High Security",
|
||||
"description": "ECDSA-only with 90-day max TTL",
|
||||
"allowed_key_algorithms": [{"algorithm": "ECDSA", "min_size": 256}],
|
||||
"max_validity_days": 90,
|
||||
"allowed_ekus": ["serverAuth"],
|
||||
"enabled": true
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
**How it works:** Certificate profiles are stored in the `certificate_profiles` table with a `allowed_key_algorithms` JSONB column that defines which key types and minimum sizes are acceptable. When a certificate is assigned to a profile, the profile constraints are enforced during CSR validation. The `max_validity_days` field controls the maximum certificate lifetime — profiles with values translating to under 1 hour enable short-lived certificate mode, where certs are exempt from CRL/OCSP.
|
||||
|
||||
**Why profiles matter:** Without profiles, any agent can submit a CSR with any key type and any validity period. Profiles create crypto policy guardrails — "production TLS certs must use ECDSA P-256 with 90-day max TTL" — that prevent configuration drift and enforce compliance requirements across the fleet.
|
||||
|
||||
**In the dashboard**, click "Profiles" in the sidebar to see and manage certificate profiles.
|
||||
|
||||
---
|
||||
|
||||
## Part 11: Agent Groups
|
||||
|
||||
Agent groups let you organize your agent fleet by criteria for dynamic policy scoping:
|
||||
|
||||
```bash
|
||||
# List existing agent groups
|
||||
curl -s $API/api/v1/agent-groups | jq '.data[] | {id, name, match_os, match_architecture}'
|
||||
```
|
||||
|
||||
Create a group that matches all Linux agents:
|
||||
|
||||
```bash
|
||||
curl -s -X POST $API/api/v1/agent-groups \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"id": "ag-demo-linux",
|
||||
"name": "Demo Linux Agents",
|
||||
"description": "All agents running Linux",
|
||||
"match_os": "linux",
|
||||
"enabled": true
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
**How it works:** Agent groups use dynamic matching criteria — `match_os`, `match_architecture`, `match_ip_cidr`, and `match_version` — that are compared against agent metadata reported via heartbeat. Agents automatically join groups when their metadata matches the criteria. Manual membership (explicit include/exclude) is also supported for edge cases. Renewal policies can be scoped to agent groups via the `agent_group_id` foreign key, so you can say "this renewal policy applies only to Linux agents."
|
||||
|
||||
**In the dashboard**, click "Agent Groups" to see groups with visual match criteria badges. The "Fleet Overview" page shows OS/architecture distribution charts powered by agent metadata.
|
||||
|
||||
---
|
||||
|
||||
## Part 12: Interactive Approval Workflow
|
||||
|
||||
For high-value certificates, you may want human oversight before renewal proceeds. The demo includes 2 pre-seeded `AwaitingApproval` renewal jobs (for `auth-production` and `payments-production`). Open **Jobs** in the sidebar — you'll see the amber "Pending Approval" banner and Approve/Reject buttons immediately.
|
||||
|
||||
```bash
|
||||
# Check jobs that need approval (demo includes 2)
|
||||
curl -s "$API/api/v1/jobs?status=AwaitingApproval" | jq '.data[] | {id, type, certificate_id, status}'
|
||||
```
|
||||
|
||||
Approve or reject them:
|
||||
|
||||
```bash
|
||||
# Approve a job
|
||||
curl -s -X POST $API/api/v1/jobs/JOB_ID/approve \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"reason": "Verified key type meets compliance requirements"}' | jq .
|
||||
|
||||
# Reject a job
|
||||
curl -s -X POST $API/api/v1/jobs/JOB_ID/reject \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"reason": "Key type does not meet PCI requirements"}' | jq .
|
||||
```
|
||||
|
||||
**How it works:** When a renewal policy has `auto_renew` set to false, renewal jobs enter the `AwaitingApproval` state instead of being processed immediately. An operator must explicitly approve or reject the job via the API or the GUI. Approved jobs transition to `Pending` and are picked up by the job processor. Rejected jobs move to `Cancelled` with the provided reason recorded in the audit trail.
|
||||
|
||||
**Why interactive approval:** Not every certificate renewal should be automatic. PCI-scoped certificates, certs with specific compliance requirements, or certificates being migrated between issuers benefit from a human checkpoint. The AwaitingApproval state creates that checkpoint without blocking the entire job pipeline.
|
||||
|
||||
**In the dashboard:** Click "Jobs" in the sidebar, filter by status "AwaitingApproval", and you'll see a list of renewal jobs waiting for approval. Each job shows the certificate, issuer, and requested validity period. Click a job to open its detail view and see the Approve / Reject buttons with a reason text field. After approval or rejection, the job status updates in real-time and the audit trail records the decision.
|
||||
|
||||
---
|
||||
|
||||
## Part 13: Advanced Query Features
|
||||
|
||||
certctl's API supports sorting, filtering, cursor pagination, and sparse field selection:
|
||||
|
||||
```bash
|
||||
# Sort by expiration date (ascending)
|
||||
curl -s "$API/api/v1/certificates?sort=notAfter" | jq '.data[] | {id, common_name, expires_at}'
|
||||
|
||||
# Sort descending (prefix with -)
|
||||
curl -s "$API/api/v1/certificates?sort=-createdAt" | jq '.data[0:3]'
|
||||
|
||||
# Time-range filter: certs expiring before May 2026
|
||||
curl -s "$API/api/v1/certificates?expires_before=2026-05-01T00:00:00Z" | jq '.data | length'
|
||||
|
||||
# Sparse fields: only return id, status, and expiry
|
||||
curl -s "$API/api/v1/certificates?fields=id,status,expires_at" | jq '.data[0]'
|
||||
|
||||
# Cursor pagination: page through results efficiently
|
||||
curl -s "$API/api/v1/certificates?page_size=3" | jq '{next_cursor: .next_cursor, count: (.data | length)}'
|
||||
|
||||
# View deployment targets for a certificate
|
||||
curl -s "$API/api/v1/certificates/mc-demo-api/deployments" | jq .
|
||||
```
|
||||
|
||||
**How it works:** Sort uses a whitelist of allowed fields (notAfter, createdAt, updatedAt, commonName, name, status, environment) mapped to SQL columns. Cursor pagination uses keyset pagination (`(created_at, id) < (cursor_time, cursor_id)`) which is more efficient than OFFSET-based pagination for large datasets. Sparse fields marshal the full object to JSON, then strip unrequested keys — lightweight but effective. Time-range filters add WHERE clauses to the SQL query.
|
||||
|
||||
**Why cursor pagination:** Page-based pagination (`?page=50&per_page=100`) requires the database to skip rows, which gets slower as page numbers increase. Cursor-based pagination (`?cursor=<token>&page_size=100`) uses an indexed seek, maintaining constant performance regardless of how deep you paginate. For large certificate inventories (thousands of certs), this is the difference between sub-millisecond and multi-second queries.
|
||||
|
||||
---
|
||||
|
||||
## Part 14: CLI Tool (M16b)
|
||||
|
||||
certctl includes a standalone CLI tool for command-line users:
|
||||
|
||||
```bash
|
||||
# Build the CLI
|
||||
cd cmd/cli && go build -o certctl-cli .
|
||||
|
||||
# Export credentials
|
||||
export CERTCTL_SERVER_URL="http://localhost:8443"
|
||||
export CERTCTL_API_KEY="test-key-123"
|
||||
|
||||
# List certificates (JSON or table format)
|
||||
./certctl-cli certs list
|
||||
|
||||
# Get certificate details
|
||||
./certctl-cli certs get mc-demo-api
|
||||
|
||||
# Trigger renewal
|
||||
./certctl-cli certs renew mc-demo-api
|
||||
|
||||
# Revoke a certificate with RFC 5280 reason
|
||||
./certctl-cli certs revoke mc-demo-payments --reason keyCompromise
|
||||
|
||||
# List agents
|
||||
./certctl-cli agents list
|
||||
|
||||
# List pending jobs
|
||||
./certctl-cli jobs list
|
||||
|
||||
# Check system health and stats
|
||||
./certctl-cli status
|
||||
|
||||
# JSON output format
|
||||
./certctl-cli --format json status
|
||||
|
||||
# Bulk import certificates from a PEM file
|
||||
./certctl-cli import /path/to/certificates.pem
|
||||
```
|
||||
|
||||
**How it works:** The CLI tool is a self-contained Go binary with zero external dependencies (just the stdlib: flag, net/http, encoding/json, text/tabwriter). It reads credentials from environment variables or command-line flags, calls the REST API endpoints, and formats output as JSON or ASCII tables. This makes it perfect for scripts, CI/CD pipelines, and automation workflows.
|
||||
|
||||
---
|
||||
|
||||
## Part 15: MCP Server for AI Integration (M18a)
|
||||
|
||||
certctl exposes 78 MCP tools covering the REST API via the Model Context Protocol (MCP), enabling seamless integration with Claude, Cursor, and other AI assistants:
|
||||
|
||||
```bash
|
||||
# Build the MCP server
|
||||
cd cmd/mcp-server && go build -o mcp-server .
|
||||
|
||||
# Export credentials
|
||||
export CERTCTL_SERVER_URL="http://localhost:8443"
|
||||
export CERTCTL_API_KEY="test-key-123"
|
||||
|
||||
# Start the MCP server (listens on stdin/stdout)
|
||||
./mcp-server
|
||||
```
|
||||
|
||||
**How it works:** The MCP server uses the official Model Context Protocol Go SDK to expose 78 stateless HTTP proxy tools covering the REST API. Each MCP tool corresponds to one or more REST endpoints and includes:
|
||||
|
||||
- **Input schema** — typed arguments with JSON schema hints for LLM-friendly introspection
|
||||
- **Binary support** — handles DER-encoded CRL and OCSP responses without mangling
|
||||
- **Error translation** — converts HTTP errors to user-readable messages
|
||||
|
||||
**Example usage from Claude:**
|
||||
|
||||
```
|
||||
User: What certificates are expiring in the next 30 days?
|
||||
|
||||
Claude uses the MCP tools to:
|
||||
1. Call tools.listCertificates with filters: {status: "Expiring"}
|
||||
2. Parse the response
|
||||
3. Display: "mc-api-prod expires in 12 days. mc-cdn-prod expires in 8 days..."
|
||||
|
||||
User: Revoke mc-payments due to key compromise
|
||||
|
||||
Claude uses the MCP tools to:
|
||||
1. Call tools.revokeCertificate with id="mc-payments" reason="keyCompromise"
|
||||
2. Return the audit trail entry showing revocation recorded
|
||||
```
|
||||
|
||||
The MCP server is perfect for:
|
||||
- Compliance audits — "Show me all certificates with PCI tags and their revocation status"
|
||||
- Incident response — "Revoke all certificates issued by the OpenSSL CA issued before 2026-01-01"
|
||||
- Operational queries — "What's the renewal success rate over the last 30 days?"
|
||||
|
||||
---
|
||||
|
||||
## Part 16: Certificate Discovery (M18b + M21)
|
||||
|
||||
certctl discovers existing certificates two ways: **filesystem scanning** (agents scan local directories) and **network scanning** (the server probes TLS endpoints). Both feed into the same triage pipeline.
|
||||
|
||||
**The demo comes pre-loaded with discovery data:** 9 discovered certificates (3 Unmanaged from filesystem scans, 3 Unmanaged from network scans, 2 Managed, 1 Dismissed), 3 discovery scans, and 3 network scan targets with recent scan results. Open **Discovery** in the sidebar to see the triage workflow immediately. The steps below show how to configure discovery from scratch.
|
||||
|
||||
### Filesystem Discovery (Agent-Side)
|
||||
|
||||
Configure the demo agent to scan for certificates. In the Docker Compose setup, agents have a `/tmp/certs` directory (created by the seed script). Restart the agent with discovery enabled:
|
||||
|
||||
```bash
|
||||
# Stop the existing agent
|
||||
docker compose -f deploy/docker-compose.yml stop agent
|
||||
|
||||
# Restart with discovery enabled (scans /tmp/certs every 6 hours, or on startup)
|
||||
docker compose -f deploy/docker-compose.yml run -e CERTCTL_DISCOVERY_DIRS=/tmp/certs agent certctl-agent
|
||||
```
|
||||
|
||||
Or with the CLI flag:
|
||||
|
||||
```bash
|
||||
certctl-agent --agent-id a-demo-1 --key-dir /tmp/keys --discovery-dirs /tmp/certs --server http://localhost:8443 --api-key test-key-123
|
||||
```
|
||||
|
||||
### Network Discovery (Server-Side)
|
||||
|
||||
The server can also discover certificates by actively probing TLS endpoints — no agent required. Network scanning is enabled by default in the Docker Compose demo (`CERTCTL_NETWORK_SCAN_ENABLED=true`), with 3 pre-configured scan targets. You can create additional targets:
|
||||
|
||||
```bash
|
||||
# Create a network scan target
|
||||
curl -s -X POST $API/api/v1/network-scan-targets \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"name": "Demo Local Scan",
|
||||
"cidrs": ["127.0.0.1/32"],
|
||||
"ports": [8443],
|
||||
"enabled": true,
|
||||
"scan_interval_hours": 6,
|
||||
"timeout_ms": 5000
|
||||
}' | jq .
|
||||
|
||||
# Trigger an immediate scan (otherwise runs every 6 hours)
|
||||
NST_ID=$(curl -s $API/api/v1/network-scan-targets | jq -r '.data[0].id')
|
||||
curl -s -X POST "$API/api/v1/network-scan-targets/$NST_ID/scan" | jq .
|
||||
|
||||
# List scan targets and their results
|
||||
curl -s $API/api/v1/network-scan-targets | jq .
|
||||
```
|
||||
|
||||
Network-discovered certificates appear in the same discovery pipeline as filesystem-discovered ones, with `agent_id=server-scanner` and `source_format=network`.
|
||||
|
||||
### Triage Discovered Certificates
|
||||
|
||||
Both discovery sources feed into the same triage workflow. Check what was found:
|
||||
|
||||
```bash
|
||||
# List discovered certificates (should show unmanaged certs found by agents and network scans)
|
||||
curl -s "$API/api/v1/discovered-certificates?status=Unmanaged" | jq '.data[] | {id, common_name, expires_at, issuer_dn, status}'
|
||||
|
||||
# Get a summary of all discoveries
|
||||
curl -s $API/api/v1/discovery-summary | jq .
|
||||
```
|
||||
|
||||
If certificates were found, you'll see entries with `status: "Unmanaged"`. Triage them — claim the ones you want to manage or dismiss the ones you don't:
|
||||
|
||||
```bash
|
||||
# Claim a certificate (link it to a managed cert, or create new enrollment)
|
||||
DISCOVERED_ID=$(curl -s "$API/api/v1/discovered-certificates?status=Unmanaged" | jq -r '.data[0].id')
|
||||
curl -s -X POST "$API/api/v1/discovered-certificates/$DISCOVERED_ID/claim" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"reason": "Migrating from external CA to certctl"}' | jq .
|
||||
|
||||
# Or dismiss a certificate
|
||||
curl -s -X POST "$API/api/v1/discovered-certificates/$DISCOVERED_ID/dismiss" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"reason": "Self-signed test cert, not production"}' | jq .
|
||||
```
|
||||
|
||||
**How it works:** Filesystem discovery: the agent scans `CERTCTL_DISCOVERY_DIRS` on startup and every 6 hours, extracts metadata (common name, SANs, issuer, expiration, key type, fingerprint) from all PEM and DER files, and POSTs findings to `POST /api/v1/agents/{id}/discoveries`. Network discovery: the server expands CIDR ranges (capped at /20 = 4096 IPs), connects to each IP:port via TLS, extracts the peer certificate chain, and stores results using `server-scanner` as a sentinel agent ID. Both sources deduplicate by fingerprint and store results with a status: **Unmanaged** (discovered, not yet managed), **Managed** (linked to a control plane cert), or **Dismissed** (operator decided not to manage). This gives you a triage workflow: discover → review → claim or dismiss.
|
||||
|
||||
### Discovery & Network Scans in the Dashboard
|
||||
|
||||
**Discovered Certificates Page:** Click "Discovery" in the sidebar to see a triage workflow. The page lists all discovered certificates grouped by status (Unmanaged, Managed, Dismissed). For each Unmanaged certificate, you see:
|
||||
- Common name and SANs
|
||||
- Issuer and subject DN
|
||||
- Expiration date
|
||||
- Fingerprint (helps dedup)
|
||||
- Source (agent ID or `server-scanner` for network scans)
|
||||
- Action buttons: Claim (manage this cert), Dismiss (ignore it)
|
||||
|
||||
Click "Claim" to bring an unmanaged certificate under certctl's control. Click "Dismiss" to remove it from the triage queue.
|
||||
|
||||
**Network Scans Page:** Click "Network Scans" in the sidebar to manage network scan targets. The page shows all configured scan targets with:
|
||||
- Target name and description
|
||||
- CIDR ranges and ports scanned
|
||||
- Enabled/disabled toggle
|
||||
- Scan interval and connection timeout
|
||||
- Last scan timestamp and result summary
|
||||
- Action buttons: Edit, Delete, Scan Now (immediate)
|
||||
|
||||
Click "Scan Now" to trigger an immediate TLS probe of the target's IP ranges. Results appear within seconds in the Discovered Certificates page as entries with `agent_id=server-scanner`.
|
||||
|
||||
**In the dashboard**, click "Discovered Certificates" in the sidebar to see what agents and network scans found — claim unmanaged certs to bring them under certctl's management, or dismiss them.
|
||||
|
||||
---
|
||||
|
||||
## End-to-End Architecture Summary
|
||||
|
||||
Here's what we just walked through, mapped to the system architecture:
|
||||
@@ -490,19 +1142,23 @@ flowchart TB
|
||||
U2 --> U3["POST /certificates"]
|
||||
U3 --> U4["POST /certificates/{id}/renew"]
|
||||
U4 --> U5["POST /certificates/{id}/deploy"]
|
||||
U5 --> U6["GET /audit"]
|
||||
U5 --> U5b["POST /certificates/{id}/revoke"]
|
||||
U5b --> U6["GET /stats + /metrics"]
|
||||
U6 --> U7["POST /profiles"]
|
||||
U7 --> U8["POST /agent-groups"]
|
||||
U8 --> U9["GET /audit"]
|
||||
end
|
||||
|
||||
subgraph "Control Plane (certctl-server)"
|
||||
API["REST API\nGo net/http"]
|
||||
SVC["Service Layer\nBusiness Logic"]
|
||||
REPO["Repository Layer\ndatabase/sql + lib/pq"]
|
||||
SCHED["Scheduler\n4 background loops"]
|
||||
SCHED["Scheduler\n6 background loops"]
|
||||
CONN["Connector Registry\nIssuer + Target + Notifier"]
|
||||
end
|
||||
|
||||
subgraph "Data Store"
|
||||
PG["PostgreSQL 16\n14 tables, TEXT PKs"]
|
||||
PG["PostgreSQL 16\n21 tables, TEXT PKs"]
|
||||
end
|
||||
|
||||
subgraph "Agent (certctl-agent)"
|
||||
@@ -616,7 +1272,20 @@ echo -e "${YELLOW}Step 9: Recent audit events...${NC}"
|
||||
curl -s $API/api/v1/audit | jq '.data[0:3] | .[] | {action, resource_type, resource_id, timestamp}'
|
||||
echo ""
|
||||
|
||||
# Step 10: Summary
|
||||
# Step 10: Revoke the certificate
|
||||
echo -e "${YELLOW}Step 10: Revoking certificate...${NC}"
|
||||
curl -s -X POST $API/api/v1/certificates/$CERT_ID/revoke \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"reason": "superseded"}' | jq .
|
||||
echo -e "${GREEN}Certificate revoked${NC}"
|
||||
echo ""
|
||||
|
||||
# Step 11: Check stats
|
||||
echo -e "${YELLOW}Step 11: Dashboard summary...${NC}"
|
||||
curl -s $API/api/v1/stats/summary | jq .
|
||||
echo ""
|
||||
|
||||
# Step 12: Summary
|
||||
echo -e "${BLUE}=== Demo Complete ===${NC}"
|
||||
echo ""
|
||||
echo "What happened:"
|
||||
@@ -624,7 +1293,9 @@ echo " 1. Created a team and owner for accountability"
|
||||
echo " 2. Created a managed certificate tracked by certctl"
|
||||
echo " 3. Triggered renewal (would contact the Local CA in production flow)"
|
||||
echo " 4. Triggered deployment (would push to NGINX/F5/IIS targets)"
|
||||
echo " 5. All actions recorded in the audit trail"
|
||||
echo " 5. Revoked the certificate with RFC 5280 reason codes"
|
||||
echo " 6. Checked dashboard stats and metrics"
|
||||
echo " 7. All actions recorded in the audit trail"
|
||||
echo ""
|
||||
echo -e "Open ${GREEN}http://localhost:8443${NC} to see everything in the dashboard."
|
||||
echo "Look for certificate: $CERT_ID"
|
||||
@@ -646,10 +1317,12 @@ If you're using this demo to present certctl to decision-makers, here's the narr
|
||||
1. **Start with the dashboard** — "This is your certificate inventory. Every TLS certificate across your infrastructure, in one place."
|
||||
2. **Point to expiring certs** — "These certificates would have caused outages. Certctl catches them automatically."
|
||||
3. **Show the cert you just created** — "I just created this via the API. It's already tracked, assigned to a team, and will be renewed automatically."
|
||||
4. **Show the audit trail** — "Complete traceability. Every action, every change, every deployment — timestamped and attributed."
|
||||
5. **Show policies** — "Guardrails. We enforce that every certificate has an owner, uses approved CAs, and stays within allowed environments."
|
||||
6. **Show agents** — "Private keys never touch the control plane. Agents handle cryptographic operations locally on your infrastructure."
|
||||
7. **Show the API** — "Everything is API-first. The dashboard is just one consumer. You can integrate with CI/CD, Terraform, or custom tooling."
|
||||
4. **Show revocation** — "If a key is compromised, one-click revocation with RFC 5280 reason codes. CRL and OCSP endpoints are served automatically."
|
||||
5. **Show the audit trail** — "Complete traceability. Every action, every change, every deployment — timestamped and attributed."
|
||||
6. **Show policies** — "Guardrails. We enforce that every certificate has an owner, uses approved CAs, and stays within allowed environments."
|
||||
7. **Show agents** — "Private keys never touch the control plane. Agents handle cryptographic operations locally on your infrastructure."
|
||||
8. **Show dashboard stats** — "Real-time metrics: expiration trends, job success rates, certificate distribution. Everything you need to operate with confidence."
|
||||
9. **Show the CLI and MCP server** — "Terminal users get a CLI tool. AI assistants get MCP integration. Everything is API-first."
|
||||
|
||||
## Teardown
|
||||
|
||||
|
||||
@@ -1,126 +0,0 @@
|
||||
# certctl Demo Guide
|
||||
|
||||
A 5-7 minute guided walkthrough of certctl's dashboard and API. Perfect for stakeholder presentations and team demos.
|
||||
|
||||
New to certificates? Read the [Concepts Guide](concepts.md) first. Want a hands-on demo where you issue certificates yourself? See the [Advanced Demo](demo-advanced.md).
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
git clone https://github.com/shankar0123/certctl.git
|
||||
cd certctl
|
||||
docker compose -f deploy/docker-compose.yml up -d --build
|
||||
```
|
||||
|
||||
Wait ~30 seconds for PostgreSQL to initialize and the server to start, then open:
|
||||
|
||||
**http://localhost:8443**
|
||||
|
||||
You'll see the dashboard pre-loaded with 14 demo certificates across multiple teams, environments, and statuses — including expiring, expired, active, failed, and in-progress renewals.
|
||||
|
||||
## What You'll See
|
||||
|
||||
### Dashboard Overview
|
||||
The main dashboard shows at a glance:
|
||||
- **Total certificates** managed across your infrastructure
|
||||
- **Expiring soon** — certificates within 30 days of expiration (yellow/red)
|
||||
- **Expired** — certificates past their expiration date
|
||||
- **Active** — healthy certificates with time remaining
|
||||
- **Renewal success rate** — percentage of automated renewals that succeeded
|
||||
|
||||
Below the stats, you'll see an **expiry timeline** showing how many certs expire in each time bucket (7/14/30/60/90 days), and a **recent activity feed** with the latest audit events.
|
||||
|
||||
### Certificates View
|
||||
Click "Certificates" in the sidebar to see the full inventory:
|
||||
- Search by name or domain
|
||||
- Filter by status (Active, Expiring, Expired, Failed) or environment (Production, Staging)
|
||||
- Sort by any column
|
||||
- Click any row to see full details: metadata, version history, deployment targets, and audit trail
|
||||
|
||||
### Demo Scenarios to Walk Through
|
||||
|
||||
**1. "We're about to have an outage"**
|
||||
Filter by status → Expiring. You'll see `auth-production` (12 days), `cdn-production` (8 days), and `mail-production` (5 days). These are real alerts the platform would catch automatically.
|
||||
|
||||
**2. "A renewal failed"**
|
||||
Look at `vpn-production` — status: Failed. Click it to see the audit trail showing the ACME challenge failure after 3 retry attempts. The system sent a webhook notification to the ops channel.
|
||||
|
||||
**3. "Who owns this cert?"**
|
||||
Click any certificate to see the owner, team, environment, and tags. Every cert has clear accountability.
|
||||
|
||||
**4. "What happened to the legacy app?"**
|
||||
Filter by status → Expired. `legacy-app` expired 3 days ago, `old-api-v1` expired 15 days ago. Both have policy violations flagged.
|
||||
|
||||
**5. "Show me the agent fleet"**
|
||||
Click "Agents" in the sidebar. Four agents are online, one (`iis-prod-agent`) went offline 3 hours ago — you'd want to investigate that.
|
||||
|
||||
**6. "What policies are enforced?"**
|
||||
Click "Policies" to see the active rules: required owner metadata, allowed environments, max certificate lifetime, minimum renewal window. Check the violations list to see which certs are non-compliant.
|
||||
|
||||
## API Walkthrough
|
||||
|
||||
The dashboard is backed by a real REST API. Try these while the demo is running:
|
||||
|
||||
```bash
|
||||
# List all certificates
|
||||
curl -s http://localhost:8443/api/v1/certificates | jq .
|
||||
|
||||
# Get expiring certs
|
||||
curl -s "http://localhost:8443/api/v1/certificates?status=expiring" | jq .
|
||||
|
||||
# Get a specific certificate
|
||||
curl -s http://localhost:8443/api/v1/certificates/mc-api-prod | jq .
|
||||
|
||||
# List agents
|
||||
curl -s http://localhost:8443/api/v1/agents | jq .
|
||||
|
||||
# View audit trail
|
||||
curl -s http://localhost:8443/api/v1/audit | jq .
|
||||
|
||||
# View policy violations (replace POLICY_ID with a real policy ID, e.g. pr-require-owner)
|
||||
curl -s http://localhost:8443/api/v1/policies/pr-require-owner/violations | jq .
|
||||
|
||||
# Check system health
|
||||
curl -s http://localhost:8443/health | jq .
|
||||
```
|
||||
|
||||
## Demo Without Docker
|
||||
|
||||
The dashboard includes a **Demo Mode** that works without any backend. Build and serve the frontend with Vite:
|
||||
|
||||
```bash
|
||||
cd web
|
||||
npm install
|
||||
npm run dev
|
||||
# Dashboard available at http://localhost:5173
|
||||
```
|
||||
|
||||
When the API is unreachable, the dashboard automatically loads realistic mock data and shows a subtle "Demo Mode" badge. This is perfect for screenshots, presentations, or quick demos without any infrastructure.
|
||||
|
||||
## Teardown
|
||||
|
||||
```bash
|
||||
docker compose -f deploy/docker-compose.yml down -v
|
||||
```
|
||||
|
||||
The `-v` flag removes the PostgreSQL data volume so you get a clean slate next time.
|
||||
|
||||
## Presenting to Stakeholders
|
||||
|
||||
If you're demoing to a team or customer, here's a suggested flow:
|
||||
|
||||
1. **Start with the dashboard** — "This is your certificate inventory at a glance"
|
||||
2. **Show the expiring certs** — "These three would have caused outages without this platform"
|
||||
3. **Click into auth-production** — "Here's the full lifecycle: who owns it, where it's deployed, when it was last renewed"
|
||||
4. **Show the failed VPN cert** — "The system tried 3 times, then alerted the team via webhook"
|
||||
5. **Show agents** — "Agents run on your infrastructure, handle key generation locally, and report back"
|
||||
6. **Show policies** — "Guardrails prevent teams from going outside approved scope"
|
||||
7. **Show the API** — "Everything you see here is API-first, so you can automate on top of it"
|
||||
|
||||
The whole walkthrough takes 5-7 minutes.
|
||||
|
||||
## Next Steps
|
||||
|
||||
- **[Advanced Demo](demo-advanced.md)** — Go hands-on: create a team, issue a certificate via API, trigger renewal, and watch it appear in the dashboard
|
||||
- **[Concepts Guide](concepts.md)** — Understand TLS certificates, CAs, and private keys from scratch
|
||||
- **[Architecture](architecture.md)** — Deep dive into the control plane, agent model, and connector architecture
|
||||
@@ -0,0 +1,191 @@
|
||||
# MCP Server Guide
|
||||
|
||||
certctl ships with an MCP (Model Context Protocol) server that lets AI assistants manage your certificate infrastructure through natural language. Ask Claude to "show me all expiring certificates," "revoke the VPN cert," or "what agents are offline?" and the MCP server translates that into API calls against your certctl instance.
|
||||
|
||||
This guide covers setup, configuration, and usage with Claude, Cursor, and other MCP-compatible tools.
|
||||
|
||||
## What Is MCP?
|
||||
|
||||
MCP is an open protocol that connects AI assistants to external tools and data sources. Instead of copying and pasting API responses into a chat window, MCP lets the AI call your tools directly. The certctl MCP server exposes all 78 API endpoints as MCP tools — the AI sees typed schemas describing what each tool does, what parameters it accepts, and what it returns.
|
||||
|
||||
The MCP server is a separate binary (`cmd/mcp-server/`) that communicates via stdio transport. It's a stateless HTTP proxy: every MCP tool call becomes an HTTP request to the certctl REST API. No new state, no new database tables, no new attack surface beyond what the API already exposes.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
You need:
|
||||
|
||||
1. A running certctl server (see [Quick Start](quickstart.md))
|
||||
2. The MCP server binary — either built from source or from a Docker image
|
||||
3. An MCP-compatible AI client (Claude Desktop, Cursor, VS Code with Copilot, etc.)
|
||||
|
||||
## Building the MCP Server
|
||||
|
||||
```bash
|
||||
cd certctl
|
||||
go build -o certctl-mcp ./cmd/mcp-server/
|
||||
```
|
||||
|
||||
The binary has zero runtime dependencies beyond the certctl server it connects to.
|
||||
|
||||
## Configuration
|
||||
|
||||
The MCP server reads two environment variables:
|
||||
|
||||
| Variable | Required | Default | Description |
|
||||
|----------|----------|---------|-------------|
|
||||
| `CERTCTL_SERVER_URL` | No | `http://localhost:8443` | URL of the certctl REST API |
|
||||
| `CERTCTL_API_KEY` | No | (empty) | API key for authentication (passed as `Bearer` token) |
|
||||
|
||||
If your certctl server has auth enabled (the default), you must provide the API key. The MCP server passes it through to every HTTP request.
|
||||
|
||||
## Setting Up with Claude Desktop
|
||||
|
||||
Add this to your Claude Desktop MCP configuration file (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS, `%APPDATA%\Claude\claude_desktop_config.json` on Windows):
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"certctl": {
|
||||
"command": "/path/to/certctl-mcp",
|
||||
"env": {
|
||||
"CERTCTL_SERVER_URL": "http://localhost:8443",
|
||||
"CERTCTL_API_KEY": "your-api-key-here"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Restart Claude Desktop. You should see "certctl" appear in the MCP tools list with 78 available tools.
|
||||
|
||||
## Setting Up with Cursor
|
||||
|
||||
In Cursor, go to Settings → MCP Servers and add:
|
||||
|
||||
```json
|
||||
{
|
||||
"certctl": {
|
||||
"command": "/path/to/certctl-mcp",
|
||||
"env": {
|
||||
"CERTCTL_SERVER_URL": "http://localhost:8443",
|
||||
"CERTCTL_API_KEY": "your-api-key-here"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Setting Up with Claude Code
|
||||
|
||||
Add certctl as an MCP server in your project's `.mcp.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"certctl": {
|
||||
"command": "/path/to/certctl-mcp",
|
||||
"env": {
|
||||
"CERTCTL_SERVER_URL": "http://localhost:8443",
|
||||
"CERTCTL_API_KEY": "your-api-key-here"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Available Tools
|
||||
|
||||
The MCP server registers 78 tools organized across 16 resource domains:
|
||||
|
||||
| Domain | Tools | Examples |
|
||||
|--------|-------|---------|
|
||||
| Certificates | 9 | List, get, create, update, archive, versions, renew, deploy, revoke |
|
||||
| CRL & OCSP | 3 | Get JSON CRL, get DER CRL by issuer, check OCSP status |
|
||||
| Issuers | 6 | List, get, create, update, delete, test connection |
|
||||
| Targets | 5 | List, get, create, update, delete |
|
||||
| Agents | 8 | List, get, register, heartbeat, CSR submit, certificate pickup, get work, report job status |
|
||||
| Jobs | 5 | List, get, cancel, approve, reject |
|
||||
| Policies | 6 | List, get, create, update, delete, list violations |
|
||||
| Profiles | 5 | List, get, create, update, delete |
|
||||
| Teams | 5 | List, get, create, update, delete |
|
||||
| Owners | 5 | List, get, create, update, delete |
|
||||
| Agent Groups | 6 | List, get, create, update, delete, list members |
|
||||
| Audit | 2 | List events (with filters), get event by ID |
|
||||
| Notifications | 3 | List, get, mark as read |
|
||||
| Stats | 5 | Summary, certs by status, expiration timeline, job trends, issuance rate |
|
||||
| Metrics | 1 | System metrics (gauges, counters, uptime) |
|
||||
| Health | 4 | Health check, readiness probe, auth info, auth check |
|
||||
|
||||
Every tool has typed input parameters with `jsonschema` descriptions, so the AI knows exactly what arguments to provide and what each field means.
|
||||
|
||||
## Example Conversations
|
||||
|
||||
Once configured, you can interact with certctl through natural language:
|
||||
|
||||
**"Show me all certificates expiring in the next 14 days"**
|
||||
The AI calls `certctl_list_certificates` with `status=Expiring` and interprets the results.
|
||||
|
||||
**"Renew the API production certificate"**
|
||||
The AI calls `certctl_trigger_renewal` with `id=mc-api-prod`.
|
||||
|
||||
**"Who owns the payments gateway cert?"**
|
||||
The AI calls `certctl_get_certificate` with `id=mc-payments-prod` and reads the `owner_id` and `team_id` fields.
|
||||
|
||||
**"Are any agents offline?"**
|
||||
The AI calls `certctl_list_agents` and checks the heartbeat timestamps.
|
||||
|
||||
**"Revoke the old VPN cert — the key was compromised"**
|
||||
The AI calls `certctl_revoke_certificate` with `id=mc-vpn-old` and `reason=keyCompromise`.
|
||||
|
||||
**"Give me a summary of the certificate fleet"**
|
||||
The AI calls `certctl_dashboard_summary` for aggregate stats, then optionally `certctl_certificates_by_status` for the breakdown.
|
||||
|
||||
**"Create a new cert for staging.api.example.com owned by the platform team"**
|
||||
The AI calls `certctl_create_certificate` with the common name, team ID, and owner ID.
|
||||
|
||||
## Architecture
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
AI["AI Assistant\n(Claude, Cursor)"]
|
||||
MCP["certctl MCP\ncmd/mcp-server/"]
|
||||
SERVER["certctl Server\n:8443"]
|
||||
|
||||
AI <-->|"stdio"| MCP
|
||||
MCP -->|"HTTP + Bearer token"| SERVER
|
||||
|
||||
MCP ~~~ TOOLS["78 tools · 16 domains\nTyped input structs"]
|
||||
```
|
||||
|
||||
The MCP server is intentionally thin:
|
||||
|
||||
- **No state** — every request is a pass-through HTTP call. Restart it anytime.
|
||||
- **No new auth** — uses the same API key as the REST API.
|
||||
- **No new dependencies** — just the official MCP Go SDK (`modelcontextprotocol/go-sdk`).
|
||||
- **No new attack surface** — the AI can only do what the API key allows.
|
||||
|
||||
## Security Considerations
|
||||
|
||||
The MCP server inherits the security properties of the REST API:
|
||||
|
||||
- **API key scoping**: The MCP server uses whatever API key you configure. If certctl gets API key scoping in a future release (per-resource or per-action permissions), the MCP server will automatically respect those restrictions.
|
||||
- **Audit trail**: Every tool call results in an HTTP request that's logged in the API audit middleware — actor, method, path, status, and latency are all recorded.
|
||||
- **Read-only usage**: For read-only AI access, you could configure a restricted API key (when key scoping ships). Until then, be aware that the AI can call write endpoints (create, update, delete, revoke) if the API key permits it.
|
||||
- **No private key exposure**: The MCP server never sees or transmits private keys — the same architectural guarantee as the REST API.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
**"MCP server not connecting"**
|
||||
Check that `CERTCTL_SERVER_URL` is reachable from where the MCP binary runs. Try `curl $CERTCTL_SERVER_URL/health` to verify.
|
||||
|
||||
**"401 Unauthorized on every tool call"**
|
||||
Your `CERTCTL_API_KEY` is missing or wrong. Check the key matches what the certctl server expects.
|
||||
|
||||
**"Tool calls return empty results"**
|
||||
The certctl server might have no data. Run the demo seed (`docker compose up`) to populate demo data, or check that your database has records.
|
||||
|
||||
## What's Next
|
||||
|
||||
- [Quick Start](quickstart.md) — Get certctl running locally
|
||||
- [OpenAPI Spec](openapi.md) — Full API reference and SDK generation
|
||||
- [Architecture](architecture.md) — System design deep dive
|
||||
- [Concepts](concepts.md) — Certificate lifecycle fundamentals
|
||||
@@ -0,0 +1,191 @@
|
||||
# OpenAPI Specification Guide
|
||||
|
||||
certctl ships with a complete OpenAPI 3.1 specification at `api/openapi.yaml`. This spec documents all 78 API operations currently specified, every request/response schema, pagination conventions, authentication requirements, and error formats. It's the single source of truth for the documented REST API. (Note: The spec will be updated to include 7 additional certificate discovery endpoints from M18b.)
|
||||
|
||||
This guide covers how to use the spec for API exploration, client SDK generation, and integration testing.
|
||||
|
||||
## Where to Find It
|
||||
|
||||
The spec lives at `api/openapi.yaml` in the repository root. It's versioned alongside the code and updated with every API change.
|
||||
|
||||
```bash
|
||||
# View the spec
|
||||
cat api/openapi.yaml
|
||||
|
||||
# Count operations
|
||||
grep "operationId:" api/openapi.yaml | wc -l
|
||||
# 78 (includes health + ready, 7 discovery endpoints pending spec update)
|
||||
```
|
||||
|
||||
## Viewing with Swagger UI
|
||||
|
||||
The fastest way to explore the API interactively is Swagger UI. Run it as a Docker container pointing at the spec:
|
||||
|
||||
```bash
|
||||
# From the certctl repo root
|
||||
docker run -p 8080:8080 \
|
||||
-e SWAGGER_JSON=/spec/openapi.yaml \
|
||||
-v $(pwd)/api:/spec \
|
||||
swaggerapi/swagger-ui
|
||||
```
|
||||
|
||||
Open http://localhost:8080 to see the full API reference with "Try it out" buttons for every endpoint.
|
||||
|
||||
Alternatively, use Redoc for a cleaner read-only view:
|
||||
|
||||
```bash
|
||||
docker run -p 8080:80 \
|
||||
-e SPEC_URL=/spec/openapi.yaml \
|
||||
-v $(pwd)/api:/usr/share/nginx/html/spec \
|
||||
redocly/redoc
|
||||
```
|
||||
|
||||
## API Structure
|
||||
|
||||
The spec organizes endpoints into 16 tags:
|
||||
|
||||
| Tag | Endpoints | Description |
|
||||
|-----|-----------|-------------|
|
||||
| Certificates | 12 | CRUD, versions, renewal, deployment, revocation, deployments |
|
||||
| CRL & OCSP | 3 | JSON CRL, DER CRL per issuer, OCSP responder |
|
||||
| Issuers | 5 | CA connector management |
|
||||
| Targets | 5 | Deployment target management |
|
||||
| Agents | 7 | Registration, heartbeat, CSR submission, work polling |
|
||||
| Jobs | 5 | Job queue with approve/reject |
|
||||
| Policies | 5 | Policy rules and violations |
|
||||
| Profiles | 5 | Certificate enrollment profiles |
|
||||
| Teams | 5 | Team management |
|
||||
| Owners | 5 | Certificate owners |
|
||||
| Agent Groups | 5 | Dynamic agent grouping |
|
||||
| Audit | 2 | Immutable audit trail |
|
||||
| Notifications | 3 | Notification events |
|
||||
| Stats | 5 | Dashboard statistics |
|
||||
| Metrics | 1 | System metrics |
|
||||
| Health | 3 | Health, readiness, auth info |
|
||||
|
||||
## Authentication
|
||||
|
||||
The spec declares a `bearerAuth` security scheme applied globally. All endpoints under `/api/v1/` require a Bearer token by default:
|
||||
|
||||
```bash
|
||||
curl -H "Authorization: Bearer your-api-key" \
|
||||
http://localhost:8443/api/v1/certificates
|
||||
```
|
||||
|
||||
Three endpoints are exempt from auth (declared with `security: []` in the spec): `/health`, `/ready`, and `/api/v1/auth/info`. The auth info endpoint tells clients whether authentication is enabled and what type is required — useful for GUIs that need to show/hide a login screen.
|
||||
|
||||
## Pagination Convention
|
||||
|
||||
All list endpoints follow the same pagination pattern:
|
||||
|
||||
**Request parameters:**
|
||||
- `page` (integer, default 1) — page number
|
||||
- `per_page` (integer, default 50, max 500) — results per page
|
||||
|
||||
**Response envelope:**
|
||||
```json
|
||||
{
|
||||
"data": [...],
|
||||
"total": 150,
|
||||
"page": 1,
|
||||
"per_page": 50
|
||||
}
|
||||
```
|
||||
|
||||
Certificates also support cursor-based pagination for large datasets:
|
||||
- `cursor` (string) — opaque cursor token from previous response
|
||||
- `page_size` (integer) — results per page when using cursor mode
|
||||
|
||||
## Generating Client SDKs
|
||||
|
||||
The OpenAPI spec can generate typed client libraries for any language. Here are examples using common generators:
|
||||
|
||||
### TypeScript (openapi-typescript-codegen)
|
||||
|
||||
```bash
|
||||
npx openapi-typescript-codegen \
|
||||
--input api/openapi.yaml \
|
||||
--output src/generated/certctl \
|
||||
--client axios
|
||||
```
|
||||
|
||||
### Python (openapi-python-client)
|
||||
|
||||
```bash
|
||||
pip install openapi-python-client
|
||||
openapi-python-client generate --path api/openapi.yaml
|
||||
```
|
||||
|
||||
### Go (oapi-codegen)
|
||||
|
||||
```bash
|
||||
go install github.com/oapi-codegen/oapi-codegen/v2/cmd/oapi-codegen@latest
|
||||
oapi-codegen -generate types,client -package certctl api/openapi.yaml > certctl_client.go
|
||||
```
|
||||
|
||||
### Java (OpenAPI Generator)
|
||||
|
||||
```bash
|
||||
npx @openapitools/openapi-generator-cli generate \
|
||||
-i api/openapi.yaml \
|
||||
-g java \
|
||||
-o generated/java-client
|
||||
```
|
||||
|
||||
## Validating the Spec
|
||||
|
||||
Verify the spec is valid OpenAPI 3.1:
|
||||
|
||||
```bash
|
||||
# Using spectral (recommended)
|
||||
npx @stoplight/spectral-cli lint api/openapi.yaml
|
||||
|
||||
# Using swagger-cli
|
||||
npx @apidevtools/swagger-cli validate api/openapi.yaml
|
||||
```
|
||||
|
||||
## Using with Postman
|
||||
|
||||
Import the spec directly into Postman:
|
||||
|
||||
1. Open Postman → Import → File → select `api/openapi.yaml`
|
||||
2. Postman creates a collection with all 78 documented operations organized by tag
|
||||
3. Set the `baseUrl` variable to `http://localhost:8443`
|
||||
4. Add an `Authorization: Bearer your-api-key` header to the collection
|
||||
|
||||
## Key Schemas
|
||||
|
||||
The spec defines typed schemas for all domain objects. Key schemas to know:
|
||||
|
||||
| Schema | Description |
|
||||
|--------|-------------|
|
||||
| `ManagedCertificate` | Core certificate record with status, expiry, owner, tags, profile |
|
||||
| `CertificateVersion` | Individual cert version with PEM, serial, fingerprint, validity |
|
||||
| `Agent` | Agent with heartbeat, metadata (OS, arch, IP, version), capabilities |
|
||||
| `Job` | Job record with type, status (7 states), certificate/target references |
|
||||
| `PolicyRule` | Policy with type (5 types), config, severity, enabled state |
|
||||
| `CertificateProfile` | Enrollment profile with allowed key types, max TTL, constraints |
|
||||
| `AuditEvent` | Immutable audit record with actor, action, resource, timestamp |
|
||||
| `RevocationReason` | RFC 5280 reason code enum (8 values) |
|
||||
| `DashboardSummary` | Aggregate stats (total certs, expiring, agents, jobs) |
|
||||
|
||||
## Integration Testing
|
||||
|
||||
Use the spec to generate contract tests that verify the API matches the spec:
|
||||
|
||||
```bash
|
||||
# Using schemathesis for fuzz testing against the spec
|
||||
pip install schemathesis
|
||||
schemathesis run api/openapi.yaml \
|
||||
--base-url http://localhost:8443 \
|
||||
--header "Authorization: Bearer your-api-key"
|
||||
```
|
||||
|
||||
This sends randomized valid requests to every endpoint and verifies the responses match the declared schemas.
|
||||
|
||||
## What's Next
|
||||
|
||||
- [MCP Server Guide](mcp.md) — AI-native access to the certctl API
|
||||
- [Quick Start](quickstart.md) — Get certctl running locally
|
||||
- [Connector Guide](connectors.md) — Build custom issuer and target connectors
|
||||
- [Architecture](architecture.md) — System design deep dive
|
||||
@@ -1,9 +1,35 @@
|
||||
# Quick Start Guide
|
||||
|
||||
Get certctl running locally and managing certificates in under 5 minutes.
|
||||
Certificate lifespans are dropping to **47 days by 2029**. At that cadence, a team managing 100 certificates is processing 7+ renewals per week — every week, forever. Manual processes break. certctl automates the entire lifecycle: issuance, renewal, deployment, revocation, and audit — with zero human intervention.
|
||||
|
||||
This guide gets you running in 5 minutes and walks you through everything certctl does.
|
||||
|
||||
New to certificates? Read the [Concepts Guide](concepts.md) first — it explains TLS, CAs, and private keys in plain language.
|
||||
|
||||
## Contents
|
||||
|
||||
1. [Prerequisites](#prerequisites)
|
||||
2. [Start Everything](#start-everything)
|
||||
3. [Open the Dashboard](#open-the-dashboard)
|
||||
4. [Explore the API](#explore-the-api)
|
||||
- [Core operations](#core-operations)
|
||||
- [Sorting, filtering, and pagination](#sorting-filtering-and-pagination)
|
||||
- [Stats and metrics](#stats-and-metrics)
|
||||
5. [Create Your First Certificate](#create-your-first-certificate)
|
||||
- [Revoke a certificate](#revoke-a-certificate)
|
||||
- [Interactive approval workflow](#interactive-approval-workflow)
|
||||
6. [Certificate Discovery](#certificate-discovery)
|
||||
- [Filesystem discovery (agent-based)](#filesystem-discovery-agent-based)
|
||||
- [Network discovery (agentless)](#network-discovery-agentless)
|
||||
- [Triage discovered certificates](#triage-discovered-certificates)
|
||||
7. [CLI Tool](#cli-tool)
|
||||
8. [MCP Server (AI Integration)](#mcp-server-ai-integration)
|
||||
9. [Demo Data Reference](#demo-data-reference)
|
||||
10. [Dashboard Demo Mode](#dashboard-demo-mode)
|
||||
11. [Presenting to Stakeholders](#presenting-to-stakeholders)
|
||||
12. [Tear Down](#tear-down)
|
||||
13. [What's Next](#whats-next)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
You need **Docker** and **Docker Compose** installed. That's it.
|
||||
@@ -17,13 +43,15 @@ On Linux, follow the official Docker install guide for your distribution.
|
||||
|
||||
## Start Everything
|
||||
|
||||
### Docker Compose (Quick Start)
|
||||
|
||||
```bash
|
||||
git clone https://github.com/shankar0123/certctl.git
|
||||
cd certctl
|
||||
docker compose -f deploy/docker-compose.yml up -d --build
|
||||
```
|
||||
|
||||
The `--build` flag is important — it builds the server image including the React frontend. Without it, Docker may use a stale cached image that doesn't include the dashboard.
|
||||
The `--build` flag builds the server image including the React frontend. Without it, Docker may use a stale cached image.
|
||||
|
||||
**For production deployments**, copy `deploy/.env.example` to `deploy/.env` and customize the credentials:
|
||||
```bash
|
||||
@@ -32,7 +60,23 @@ cp deploy/.env.example deploy/.env
|
||||
docker compose -f deploy/docker-compose.yml up -d --build
|
||||
```
|
||||
|
||||
Wait about 30 seconds for PostgreSQL to initialize and the server to boot. Check that everything is healthy:
|
||||
### Kubernetes with Helm
|
||||
|
||||
For production deployments on Kubernetes, use the Helm chart:
|
||||
|
||||
```bash
|
||||
helm install certctl deploy/helm/certctl/ \
|
||||
--create-namespace --namespace certctl \
|
||||
--set server.auth.apiKey="your-secure-api-key" \
|
||||
--set postgresql.auth.password="your-db-password" \
|
||||
--set ingress.enabled=true \
|
||||
--set ingress.hosts[0].host="certctl.example.com" \
|
||||
--set ingress.hosts[0].tls=true
|
||||
```
|
||||
|
||||
The chart includes: server Deployment (with configurable replicas, health probes, security context), PostgreSQL StatefulSet with persistent volumes, agent DaemonSet (one agent per infrastructure node), optional Ingress with TLS, and ServiceAccount with RBAC. All certctl configuration options are exposed in `values.yaml` — customize issuer settings, target connectors, scheduler intervals, and notifier credentials there.
|
||||
|
||||
Wait about 30 seconds for PostgreSQL to initialize, then verify:
|
||||
|
||||
```bash
|
||||
docker compose -f deploy/docker-compose.yml ps
|
||||
@@ -46,7 +90,6 @@ certctl-server Up (healthy)
|
||||
certctl-agent Up
|
||||
```
|
||||
|
||||
Verify the server responds:
|
||||
```bash
|
||||
curl http://localhost:8443/health
|
||||
```
|
||||
@@ -58,98 +101,129 @@ curl http://localhost:8443/health
|
||||
|
||||
Open **http://localhost:8443** in your browser.
|
||||
|
||||
The dashboard comes pre-loaded with 14 demo certificates across multiple teams, environments, and statuses. You'll see expiring certs, expired certs, active certs, failed renewals — a realistic snapshot of what a certificate inventory looks like in a real organization.
|
||||
> **Note:** The Docker Compose demo runs with authentication disabled (`CERTCTL_AUTH_TYPE=none`) so you can explore immediately. For production, set `CERTCTL_AUTH_TYPE=api-key` and `CERTCTL_AUTH_SECRET=<your-secret>` in your environment, then pass `Authorization: Bearer <your-secret>` on all API requests. The dashboard will prompt for your API key on first load.
|
||||
>
|
||||
> **Key rotation:** `CERTCTL_AUTH_SECRET` accepts comma-separated keys (e.g., `CERTCTL_AUTH_SECRET=new-key,old-key`). Both keys are valid simultaneously, enabling zero-downtime rotation: add the new key, roll clients over, then remove the old key.
|
||||
|
||||
Explore the sidebar: Certificates, Agents, Policies, Jobs, Audit Trail, Notifications. Everything you see in the dashboard is backed by the REST API.
|
||||
The dashboard comes pre-loaded with 15 demo certificates across multiple teams, environments, and statuses — expiring certs, expired certs, active certs, failed renewals. A realistic snapshot of what certificate management looks like in a real organization.
|
||||
|
||||
### What you're looking at
|
||||
|
||||
The main dashboard shows total certificates, how many are expiring soon, how many have expired, the renewal success rate, and four charts: an **expiration heatmap** (90-day weekly buckets), **renewal success rate trends** (30-day line chart), **certificate status distribution** (donut chart), and **issuance rate** (30-day bar chart).
|
||||
|
||||
Explore the sidebar: Certificates, Agents, Policies, Jobs, Audit Trail, Notifications, Profiles, Teams, Owners, Agent Groups, Fleet Overview, Short-Lived Credentials, Discovery, and Network Scans.
|
||||
|
||||
### Scenarios to walk through
|
||||
|
||||
**"We're about to have an outage"** — Filter certificates by status → Expiring. You'll see `auth-production` (12 days), `cdn-production` (8 days), and `mail-production` (5 days). At 47-day lifespans, this is every other week. certctl catches these automatically and triggers renewal before they expire.
|
||||
|
||||
**"A renewal failed"** — Look at `vpn-production` — status: Failed. Click it to see the audit trail showing the ACME challenge failure after 3 retry attempts. The system sent a webhook notification to the ops channel. No one had to notice manually.
|
||||
|
||||
**"Who owns this cert?"** — Click any certificate. Owner, team, environment, tags. Clear accountability. Notifications route to the owner's email automatically.
|
||||
|
||||
**"Can I revoke a compromised cert?"** — Click any active certificate, then "Revoke." A modal with RFC 5280 reason codes (Key Compromise, Superseded, Cessation of Operation). After revocation, CRL and OCSP are served automatically — clients stop trusting the cert immediately.
|
||||
|
||||
**"What about certificates already in production?"** — Click "Discovery" in the sidebar. The demo comes pre-loaded with 9 discovered certificates — some found by agents scanning filesystems, some found by the server probing TLS endpoints on the network. You'll see Unmanaged certs waiting for triage (including an expired printer cert and an expiring switch management cert), certs already linked to managed inventory, and one that was dismissed. Claim unmanaged certs to bring them under automation, or dismiss them. Click "Network Scans" to see the 3 configured scan targets with recent scan results.
|
||||
|
||||
**"I need to approve a renewal before it proceeds"** — Click "Jobs" in the sidebar. You'll see an amber banner: "2 jobs awaiting approval." These are renewal jobs for `auth-production` and `payments-production` that require human sign-off before proceeding. Click Approve or Reject with a reason — the decision is recorded in the audit trail.
|
||||
|
||||
**"Show me the agent fleet"** — Click "Agents." Four agents online, one offline. Click "Fleet Overview" for OS/architecture grouping, version distribution, and per-platform listing. Agents generate ECDSA P-256 keys locally — private keys never leave your infrastructure.
|
||||
|
||||
**"What about bulk operations?"** — On the Certificates page, select multiple certificates with checkboxes. A bulk action bar appears: trigger renewal, revoke with reason codes, or reassign ownership — all with progress tracking. At 47-day lifespans with hundreds of certs, bulk operations aren't optional.
|
||||
|
||||
**"Short-lived credentials?"** — Click "Short-Lived" in the sidebar. Live countdown timers for certificates with TTL under 1 hour. Auto-refresh every 10 seconds. These are for service-to-service auth where rapid expiry replaces revocation.
|
||||
|
||||
## Explore the API
|
||||
|
||||
The dashboard reads from the same REST API you can call directly. All endpoints live under `/api/v1/` and return JSON.
|
||||
Everything you see in the dashboard is backed by the REST API. All endpoints live under `/api/v1/` and return JSON.
|
||||
|
||||
### List all certificates
|
||||
### Core operations
|
||||
|
||||
```bash
|
||||
# List all certificates
|
||||
curl -s http://localhost:8443/api/v1/certificates | jq .
|
||||
```
|
||||
|
||||
The response has this shape:
|
||||
```json
|
||||
{
|
||||
"data": [
|
||||
{
|
||||
"id": "mc-api-prod",
|
||||
"name": "API Production",
|
||||
"common_name": "api.example.com",
|
||||
"sans": ["api.example.com", "api-v2.example.com"],
|
||||
"environment": "production",
|
||||
"owner_id": "o-alice",
|
||||
"team_id": "t-platform",
|
||||
"issuer_id": "iss-local",
|
||||
"status": "Active",
|
||||
"expires_at": "2026-05-28T00:00:00Z",
|
||||
"tags": {"service": "api-gateway", "tier": "critical"},
|
||||
"created_at": "2026-03-14T00:00:00Z",
|
||||
"updated_at": "2026-03-14T00:00:00Z"
|
||||
}
|
||||
],
|
||||
"total": 14,
|
||||
"page": 1,
|
||||
"per_page": 50
|
||||
}
|
||||
```
|
||||
|
||||
### Filter by status
|
||||
|
||||
```bash
|
||||
# Get only expiring certificates
|
||||
# Filter by status
|
||||
curl -s "http://localhost:8443/api/v1/certificates?status=Expiring" | jq .
|
||||
|
||||
# Get only production certificates
|
||||
# Filter by environment
|
||||
curl -s "http://localhost:8443/api/v1/certificates?environment=production" | jq .
|
||||
```
|
||||
|
||||
### Get a specific certificate
|
||||
|
||||
```bash
|
||||
# Get a specific certificate
|
||||
curl -s http://localhost:8443/api/v1/certificates/mc-api-prod | jq .
|
||||
```
|
||||
|
||||
### List agents
|
||||
# Get deployment targets for a certificate
|
||||
curl -s http://localhost:8443/api/v1/certificates/mc-api-prod/deployments | jq .
|
||||
|
||||
```bash
|
||||
# List agents
|
||||
curl -s http://localhost:8443/api/v1/agents | jq .
|
||||
```
|
||||
|
||||
### Check agent pending work
|
||||
# Check agent pending work
|
||||
curl -s http://localhost:8443/api/v1/agents/ag-web-prod/work | jq .
|
||||
|
||||
```bash
|
||||
# Replace with an actual agent ID from the list above
|
||||
curl -s http://localhost:8443/api/v1/agents/agent-nginx-prod/work | jq .
|
||||
```
|
||||
|
||||
### View audit trail
|
||||
|
||||
```bash
|
||||
# View audit trail
|
||||
curl -s http://localhost:8443/api/v1/audit | jq .
|
||||
```
|
||||
|
||||
### View policy rules
|
||||
|
||||
```bash
|
||||
# View policies and violations
|
||||
curl -s http://localhost:8443/api/v1/policies | jq .
|
||||
curl -s http://localhost:8443/api/v1/policies/pr-require-owner/violations | jq .
|
||||
|
||||
# Notifications
|
||||
curl -s http://localhost:8443/api/v1/notifications | jq .
|
||||
|
||||
# Profiles and agent groups
|
||||
curl -s http://localhost:8443/api/v1/profiles | jq .
|
||||
curl -s http://localhost:8443/api/v1/agent-groups | jq .
|
||||
```
|
||||
|
||||
### View notifications
|
||||
### Sorting, filtering, and pagination
|
||||
|
||||
```bash
|
||||
curl -s http://localhost:8443/api/v1/notifications | jq .
|
||||
# Sort by expiration date (ascending)
|
||||
curl -s "http://localhost:8443/api/v1/certificates?sort=notAfter" | jq .
|
||||
|
||||
# Sort descending (prefix with -)
|
||||
curl -s "http://localhost:8443/api/v1/certificates?sort=-createdAt" | jq .
|
||||
|
||||
# Time-range filters (RFC3339)
|
||||
curl -s "http://localhost:8443/api/v1/certificates?expires_before=2026-05-01T00:00:00Z" | jq .
|
||||
curl -s "http://localhost:8443/api/v1/certificates?created_after=2026-03-01T00:00:00Z" | jq .
|
||||
|
||||
# Sparse fields — request only what you need
|
||||
curl -s "http://localhost:8443/api/v1/certificates?fields=id,common_name,status,expires_at" | jq .
|
||||
|
||||
# Cursor pagination — efficient for large inventories
|
||||
curl -s "http://localhost:8443/api/v1/certificates?page_size=5" | jq '{next_cursor: .next_cursor, count: (.data | length)}'
|
||||
curl -s "http://localhost:8443/api/v1/certificates?cursor=<next_cursor_value>&page_size=5" | jq .
|
||||
```
|
||||
|
||||
Supported sort fields: `notAfter`, `expiresAt`, `createdAt`, `updatedAt`, `commonName`, `name`, `status`, `environment`.
|
||||
|
||||
### Stats and metrics
|
||||
|
||||
```bash
|
||||
# Dashboard summary
|
||||
curl -s http://localhost:8443/api/v1/stats/summary | jq .
|
||||
|
||||
# Certificates by status
|
||||
curl -s http://localhost:8443/api/v1/stats/certificates-by-status | jq .
|
||||
|
||||
# Expiration timeline (next 90 days)
|
||||
curl -s "http://localhost:8443/api/v1/stats/expiration-timeline?days=90" | jq .
|
||||
|
||||
# Job trends (last 30 days)
|
||||
curl -s "http://localhost:8443/api/v1/stats/job-trends?days=30" | jq .
|
||||
|
||||
# JSON metrics
|
||||
curl -s http://localhost:8443/api/v1/metrics | jq .
|
||||
|
||||
# Prometheus format (for Prometheus, Grafana Agent, Datadog)
|
||||
curl -s http://localhost:8443/api/v1/metrics/prometheus
|
||||
```
|
||||
|
||||
## Create Your First Certificate
|
||||
|
||||
Let's create a new managed certificate from scratch using the API. This will create a certificate record that certctl will track, renew, and deploy.
|
||||
|
||||
### Step 1: Create a certificate
|
||||
Create a certificate record that certctl will track, renew, and deploy automatically.
|
||||
|
||||
```bash
|
||||
curl -s -X POST http://localhost:8443/api/v1/certificates \
|
||||
@@ -168,59 +242,213 @@ curl -s -X POST http://localhost:8443/api/v1/certificates \
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
The server returns the created certificate. Since we didn't include an `id` field, the server auto-generates one using the name and a timestamp:
|
||||
```json
|
||||
{
|
||||
"id": "My First Certificate-1710403200000000000",
|
||||
"name": "My First Certificate",
|
||||
"common_name": "myapp.example.com",
|
||||
"status": "Pending",
|
||||
"created_at": "2026-03-14T..."
|
||||
}
|
||||
```
|
||||
|
||||
Save the certificate ID (or provide your own `id` in the request body, e.g. `"id": "mc-my-first"`):
|
||||
```bash
|
||||
CERT_ID="<paste the id from the response>"
|
||||
```
|
||||
|
||||
### Step 2: Trigger renewal
|
||||
|
||||
Trigger renewal:
|
||||
```bash
|
||||
curl -s -X POST http://localhost:8443/api/v1/certificates/$CERT_ID/renew | jq .
|
||||
```
|
||||
|
||||
This creates a renewal job that will be processed by the scheduler.
|
||||
|
||||
### Step 3: Check the certificate
|
||||
|
||||
Check the result:
|
||||
```bash
|
||||
curl -s http://localhost:8443/api/v1/certificates/$CERT_ID | jq .
|
||||
```
|
||||
|
||||
### Step 4: Check the audit trail
|
||||
|
||||
```bash
|
||||
curl -s http://localhost:8443/api/v1/audit | jq '.data[0:3]'
|
||||
```
|
||||
|
||||
Refresh the dashboard at http://localhost:8443 — your new certificate appears in the inventory.
|
||||
|
||||
## Understanding the Demo Data
|
||||
### Revoke a certificate
|
||||
|
||||
The demo comes pre-loaded with realistic data so you can explore certctl's features immediately:
|
||||
When a private key is compromised or a service is decommissioned:
|
||||
|
||||
```bash
|
||||
curl -s -X POST http://localhost:8443/api/v1/certificates/$CERT_ID/revoke \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"reason": "superseded"}' | jq .
|
||||
```
|
||||
|
||||
Supported RFC 5280 reason codes: `unspecified`, `keyCompromise`, `caCompromise`, `affiliationChanged`, `superseded`, `cessationOfOperation`, `certificateHold`, `privilegeWithdrawn`.
|
||||
|
||||
Confirm via CRL:
|
||||
```bash
|
||||
curl -s http://localhost:8443/api/v1/crl | jq .
|
||||
```
|
||||
|
||||
### Interactive approval workflow
|
||||
|
||||
For high-value certificates where you want human oversight. The demo includes 2 pre-seeded jobs in `AwaitingApproval` status (for `auth-production` and `payments-production`). Open **Jobs** in the sidebar and you'll see the amber "Pending Approval" banner immediately.
|
||||
|
||||
```bash
|
||||
# List jobs awaiting approval (demo includes 2)
|
||||
curl -s "http://localhost:8443/api/v1/jobs?status=AwaitingApproval" | jq '.data[] | {id, certificate_id, status}'
|
||||
|
||||
# Approve a pending job
|
||||
curl -s -X POST http://localhost:8443/api/v1/jobs/JOB_ID/approve \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"reason": "Approved for production deployment"}' | jq .
|
||||
|
||||
# Reject a pending job
|
||||
curl -s -X POST http://localhost:8443/api/v1/jobs/JOB_ID/reject \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"reason": "Key type does not meet compliance requirements"}' | jq .
|
||||
```
|
||||
|
||||
## Certificate Discovery
|
||||
|
||||
Find certificates already running in your infrastructure — ones you didn't issue through certctl.
|
||||
|
||||
The demo environment comes pre-loaded with 9 discovered certificates (from agent filesystem scans and server-side network scans), 3 network scan targets, and recent scan history. Open **Discovery** and **Network Scans** in the sidebar to see the triage workflow immediately.
|
||||
|
||||
### Filesystem discovery (agent-based)
|
||||
|
||||
```bash
|
||||
# Configure agent to scan directories
|
||||
export CERTCTL_DISCOVERY_DIRS="/etc/nginx/certs,/etc/ssl/certs,/var/lib/certs"
|
||||
# Agent scans on startup + every 6 hours
|
||||
```
|
||||
|
||||
### Network discovery (agentless)
|
||||
|
||||
```bash
|
||||
# Enable network scanning
|
||||
export CERTCTL_NETWORK_SCAN_ENABLED=true
|
||||
|
||||
# Create a scan target
|
||||
curl -s -X POST http://localhost:8443/api/v1/network-scan-targets \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"name": "Internal Network",
|
||||
"cidrs": ["10.0.1.0/24"],
|
||||
"ports": [443, 8443],
|
||||
"enabled": true,
|
||||
"scan_interval_hours": 6,
|
||||
"timeout_ms": 5000
|
||||
}' | jq .
|
||||
|
||||
# Trigger an immediate scan
|
||||
curl -s -X POST http://localhost:8443/api/v1/network-scan-targets/nst-internal-network/scan | jq .
|
||||
```
|
||||
|
||||
### Triage discovered certificates
|
||||
|
||||
```bash
|
||||
# List discovered certs
|
||||
curl -s "http://localhost:8443/api/v1/discovered-certificates?agent_id=agent-nginx-prod" | jq .
|
||||
|
||||
# Summary counts
|
||||
curl -s http://localhost:8443/api/v1/discovery-summary | jq .
|
||||
|
||||
# Claim a discovered cert (bring under management)
|
||||
curl -s -X POST "http://localhost:8443/api/v1/discovered-certificates/DISCOVERY_ID/claim" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"managed_certificate_id": "mc-api-prod"}' | jq .
|
||||
```
|
||||
|
||||
## CLI Tool
|
||||
|
||||
```bash
|
||||
cd cmd/cli && go build -o certctl-cli .
|
||||
|
||||
export CERTCTL_SERVER_URL="http://localhost:8443"
|
||||
export CERTCTL_API_KEY="test-key-123"
|
||||
|
||||
./certctl-cli certs list # List certificates
|
||||
./certctl-cli certs get mc-api-prod # Certificate details
|
||||
./certctl-cli certs renew mc-api-prod # Trigger renewal
|
||||
./certctl-cli certs revoke mc-api-prod --reason keyCompromise
|
||||
./certctl-cli agents list # List agents
|
||||
./certctl-cli jobs list # List jobs
|
||||
./certctl-cli import /path/to/certs.pem # Bulk import
|
||||
./certctl-cli status # Health + stats
|
||||
```
|
||||
|
||||
## Scheduled Certificate Digest Emails
|
||||
|
||||
Enable automatic HTML digest emails with certificate stats, expiration timeline, and job health:
|
||||
|
||||
```bash
|
||||
# Set SMTP configuration
|
||||
export CERTCTL_SMTP_HOST=smtp.gmail.com
|
||||
export CERTCTL_SMTP_PORT=587
|
||||
export CERTCTL_SMTP_USERNAME=admin@example.com
|
||||
export CERTCTL_SMTP_PASSWORD=your-app-password
|
||||
export CERTCTL_SMTP_FROM_ADDRESS=certctl@example.com
|
||||
export CERTCTL_SMTP_USE_TLS=true
|
||||
|
||||
# Enable digest and set recipients
|
||||
export CERTCTL_DIGEST_ENABLED=true
|
||||
export CERTCTL_DIGEST_INTERVAL=24h
|
||||
export CERTCTL_DIGEST_RECIPIENTS=ops@example.com,security@example.com
|
||||
```
|
||||
|
||||
Preview the digest HTML before enabling scheduled delivery:
|
||||
```bash
|
||||
curl http://localhost:8443/api/v1/digest/preview | jq '.html' | grep -o '<html>' # Shows HTML is ready
|
||||
|
||||
# Trigger a digest send immediately (outside of schedule)
|
||||
curl -X POST http://localhost:8443/api/v1/digest/send
|
||||
```
|
||||
|
||||
If no recipients are configured (`CERTCTL_DIGEST_RECIPIENTS` empty), the digest falls back to certificate owner emails. Digests include total certificates, expiring soon, expired, active agents, completed/failed jobs (30-day summary), and a table of expiring certs color-coded by urgency (7/14/30 days).
|
||||
|
||||
## MCP Server (AI Integration)
|
||||
|
||||
```bash
|
||||
cd cmd/mcp-server && go build -o mcp-server .
|
||||
|
||||
export CERTCTL_SERVER_URL="http://localhost:8443"
|
||||
export CERTCTL_API_KEY="test-key-123"
|
||||
|
||||
./mcp-server
|
||||
```
|
||||
|
||||
Exposes 78 MCP tools covering the REST API via stdio transport. Ask Claude: "What certificates are expiring in the next 30 days?", "Revoke the payments cert due to key compromise", "Show me the audit trail."
|
||||
|
||||
## Demo Data Reference
|
||||
|
||||
| Resource | Count | Examples |
|
||||
|----------|-------|---------|
|
||||
| Teams | 5 | Platform, Security, Payments, Frontend, Data |
|
||||
| Owners | 5 | Alice, Bob, Carol, Dave, Eve |
|
||||
| Issuers | 3 | Local Dev CA, Let's Encrypt Staging, DigiCert |
|
||||
| Agents | 5 | nginx-prod, nginx-staging, f5-prod, iis-prod, data-agent |
|
||||
| Issuers | 4 | Local Dev CA, Let's Encrypt Staging, step-ca Internal, DigiCert (disabled) |
|
||||
| Agents | 6 | ag-web-prod, ag-web-staging, ag-lb-prod, ag-iis-prod, ag-data-prod, server-scanner (network discovery) |
|
||||
| Targets | 5 | NGINX (prod/staging/data), F5 LB, IIS |
|
||||
| Certificates | 14 | Various statuses: Active, Expiring, Expired, Failed |
|
||||
| Certificates | 15 | Various statuses: Active, Expiring, Expired, Failed, Wildcard |
|
||||
| Discovered Certs | 9 | 5 Unmanaged (filesystem + network), 2 Managed (linked), 1 Dismissed, network-discovered expired printer cert |
|
||||
| Discovery Scans | 3 | Agent filesystem scans + network TLS scan |
|
||||
| Network Scan Targets | 3 | DC1 Web Servers, DC2 Application Tier, DMZ Public Endpoints |
|
||||
| Jobs (Approval) | 2 | AwaitingApproval renewal jobs for auth-prod and payments-prod |
|
||||
| Policies | 4 | Required owner, allowed environments, max lifetime, min renewal window |
|
||||
| Profiles | 4 | Standard TLS, Internal mTLS, Short-Lived, High Security |
|
||||
| Agent Groups | 5 | Linux agents, ARM agents, Production subnet, etc. |
|
||||
|
||||
Certificates have varied statuses so you can see what each state looks like in the dashboard: healthy certs with 45+ days remaining, certs about to expire (5-12 days), certs that already expired, and a failed renewal.
|
||||
## Dashboard Demo Mode
|
||||
|
||||
The dashboard works without a backend for screenshots and presentations:
|
||||
|
||||
```bash
|
||||
cd web && npm install && npm run dev
|
||||
# Dashboard at http://localhost:5173
|
||||
```
|
||||
|
||||
When the API is unreachable, the dashboard loads realistic mock data with a "Demo Mode" badge.
|
||||
|
||||
## Presenting to Stakeholders
|
||||
|
||||
A suggested 5-minute flow:
|
||||
|
||||
1. **Dashboard** — "Certificate inventory at a glance. Real-time charts show expiration trends and renewal health."
|
||||
2. **Expiring certs** — "These three would have caused outages. At 47-day lifespans, this happens every other week."
|
||||
3. **Certificate detail** — "Full lifecycle: who owns it, where it's deployed, deployment timeline, version history with rollback."
|
||||
4. **Revocation** — "One click revokes with an RFC 5280 reason code. CRL and OCSP served automatically."
|
||||
5. **Failed renewal** — "System tried 3 times, then alerted the team via Slack, Teams, PagerDuty, or OpsGenie."
|
||||
6. **Agent fleet** — "Agents handle key generation locally (ECDSA P-256). Private keys never leave your infrastructure."
|
||||
7. **Discovery** — "Agents scan filesystems, server probes TLS endpoints. We find what you're not managing yet."
|
||||
8. **Bulk operations** — "Select multiple certs, renew or revoke in bulk. At 47-day lifespans with hundreds of certs, this is essential."
|
||||
9. **Audit trail** — "Every action recorded. Export to CSV/JSON for compliance."
|
||||
10. **CLI + MCP** — "Terminal users get `certctl-cli`. AI assistants get MCP integration. Everything is API-first."
|
||||
|
||||
## Tear Down
|
||||
|
||||
@@ -228,11 +456,11 @@ Certificates have varied statuses so you can see what each state looks like in t
|
||||
docker compose -f deploy/docker-compose.yml down -v
|
||||
```
|
||||
|
||||
The `-v` flag removes the PostgreSQL data volume so you get a clean slate next time.
|
||||
The `-v` flag removes the PostgreSQL data volume for a clean slate.
|
||||
|
||||
## What's Next
|
||||
|
||||
- **[Advanced Demo](demo-advanced.md)** — Issue a real certificate via the Local CA and watch it appear in the dashboard
|
||||
- **[Demo Walkthrough](demo-guide.md)** — Guided 5-minute stakeholder presentation
|
||||
- **[Advanced Demo](demo-advanced.md)** — Issue a real certificate via the Local CA end-to-end
|
||||
- **[Architecture](architecture.md)** — How the control plane, agents, and connectors work together
|
||||
- **[Connector Guide](connectors.md)** — Build custom connectors for your infrastructure
|
||||
- **[Concepts Guide](concepts.md)** — TLS certificates, CAs, and private keys explained from scratch
|
||||
|
||||
|
After Width: | Height: | Size: 755 KiB |
|
After Width: | Height: | Size: 229 KiB |
|
After Width: | Height: | Size: 296 KiB |
|
After Width: | Height: | Size: 160 KiB |
|
After Width: | Height: | Size: 182 KiB |
|
After Width: | Height: | Size: 179 KiB |
|
After Width: | Height: | Size: 293 KiB |
|
After Width: | Height: | Size: 166 KiB |
|
After Width: | Height: | Size: 192 KiB |
|
After Width: | Height: | Size: 162 KiB |
|
After Width: | Height: | Size: 154 KiB |
|
After Width: | Height: | Size: 150 KiB |
|
After Width: | Height: | Size: 148 KiB |
|
After Width: | Height: | Size: 179 KiB |
|
After Width: | Height: | Size: 120 KiB |
|
After Width: | Height: | Size: 340 KiB |
|
After Width: | Height: | Size: 179 KiB |
|
After Width: | Height: | Size: 160 KiB |
|
After Width: | Height: | Size: 340 KiB |
|
After Width: | Height: | Size: 296 KiB |
|
After Width: | Height: | Size: 229 KiB |
|
After Width: | Height: | Size: 182 KiB |
|
After Width: | Height: | Size: 162 KiB |
|
After Width: | Height: | Size: 179 KiB |
|
After Width: | Height: | Size: 293 KiB |
|
After Width: | Height: | Size: 150 KiB |
|
After Width: | Height: | Size: 166 KiB |
|
After Width: | Height: | Size: 192 KiB |
|
After Width: | Height: | Size: 120 KiB |
|
After Width: | Height: | Size: 154 KiB |
|
After Width: | Height: | Size: 148 KiB |
|
After Width: | Height: | Size: 438 KiB |
|
After Width: | Height: | Size: 404 KiB |
|
After Width: | Height: | Size: 700 KiB |
|
After Width: | Height: | Size: 680 KiB |
|
After Width: | Height: | Size: 500 KiB |
|
After Width: | Height: | Size: 432 KiB |
|
After Width: | Height: | Size: 399 KiB |
|
After Width: | Height: | Size: 454 KiB |
|
After Width: | Height: | Size: 615 KiB |
|
After Width: | Height: | Size: 396 KiB |
|
After Width: | Height: | Size: 414 KiB |