mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-11 11:18:51 +00:00
feat(M48): continuous TLS health monitoring — endpoint state machine, shared tlsprobe, 8 API endpoints, GUI
Adds continuous TLS endpoint health monitoring that closes the deploy→verify→monitor loop. After M25 verifies a deployment succeeded once, M48 continuously confirms it stays healthy. Key components: - Shared `internal/tlsprobe/` package extracted from network scanner for reuse - Health status state machine: healthy → degraded (2 failures) → down (5 failures), plus cert_mismatch when served fingerprint differs from expected - 8th scheduler loop (60s tick, per-endpoint configurable intervals) - PostgreSQL migration 000011: endpoint_health_checks + endpoint_health_history tables - 8 REST API endpoints (CRUD, history, acknowledge, summary) - Health Monitor GUI page with summary bar, status table, create modal, auto-refresh - 38 new tests (5 tlsprobe + 11 domain + 10 service + 8 handler + 4 frontend) - All coverage thresholds maintained (service 68%, handler 83%, domain 87%, middleware 63%) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -32,6 +32,7 @@ type Config struct {
|
||||
GoogleCAS GoogleCASConfig
|
||||
AWSACMPCA AWSACMPCAConfig
|
||||
Digest DigestConfig
|
||||
HealthCheck HealthCheckConfig
|
||||
Encryption EncryptionConfig
|
||||
}
|
||||
|
||||
@@ -319,6 +320,46 @@ type DigestConfig struct {
|
||||
Recipients []string
|
||||
}
|
||||
|
||||
// HealthCheckConfig contains configuration for continuous TLS health monitoring (M48).
|
||||
type HealthCheckConfig struct {
|
||||
// Enabled controls whether health checks are enabled.
|
||||
// Default: false.
|
||||
// Setting: CERTCTL_HEALTH_CHECK_ENABLED environment variable.
|
||||
Enabled bool
|
||||
|
||||
// CheckInterval is the main scheduler loop interval for polling due checks.
|
||||
// Default: 60 seconds. Each endpoint has its own check_interval_seconds.
|
||||
// Setting: CERTCTL_HEALTH_CHECK_INTERVAL environment variable.
|
||||
CheckInterval time.Duration
|
||||
|
||||
// DefaultInterval is the default probe interval in seconds for each endpoint (per-endpoint basis).
|
||||
// Default: 300 seconds (5 minutes).
|
||||
// Setting: CERTCTL_HEALTH_CHECK_DEFAULT_INTERVAL environment variable.
|
||||
DefaultInterval int
|
||||
|
||||
// DefaultTimeout is the default TLS connection timeout in milliseconds.
|
||||
// Default: 5000 milliseconds (5 seconds).
|
||||
// Setting: CERTCTL_HEALTH_CHECK_DEFAULT_TIMEOUT environment variable.
|
||||
DefaultTimeout int
|
||||
|
||||
// MaxConcurrent is the maximum number of concurrent TLS probes.
|
||||
// Default: 20.
|
||||
// Setting: CERTCTL_HEALTH_CHECK_MAX_CONCURRENT environment variable.
|
||||
MaxConcurrent int
|
||||
|
||||
// HistoryRetention controls how long probe history records are kept.
|
||||
// Default: 30 days. Older records are purged by the scheduler.
|
||||
// Setting: CERTCTL_HEALTH_CHECK_HISTORY_RETENTION environment variable.
|
||||
HistoryRetention time.Duration
|
||||
|
||||
// AutoCreate controls whether health checks are auto-created when:
|
||||
// - A deployment job completes with verification success
|
||||
// - A network scan target has health_check_enabled=true
|
||||
// Default: true.
|
||||
// Setting: CERTCTL_HEALTH_CHECK_AUTO_CREATE environment variable.
|
||||
AutoCreate bool
|
||||
}
|
||||
|
||||
// ACMEConfig contains ACME issuer connector configuration.
|
||||
type ACMEConfig struct {
|
||||
// DirectoryURL is the ACME directory URL for certificate issuance.
|
||||
@@ -678,6 +719,15 @@ func Load() (*Config, error) {
|
||||
Interval: getEnvDuration("CERTCTL_DIGEST_INTERVAL", 24*time.Hour),
|
||||
Recipients: getEnvList("CERTCTL_DIGEST_RECIPIENTS", nil),
|
||||
},
|
||||
HealthCheck: HealthCheckConfig{
|
||||
Enabled: getEnvBool("CERTCTL_HEALTH_CHECK_ENABLED", false),
|
||||
CheckInterval: getEnvDuration("CERTCTL_HEALTH_CHECK_INTERVAL", 60*time.Second),
|
||||
DefaultInterval: getEnvInt("CERTCTL_HEALTH_CHECK_DEFAULT_INTERVAL", 300),
|
||||
DefaultTimeout: getEnvInt("CERTCTL_HEALTH_CHECK_DEFAULT_TIMEOUT", 5000),
|
||||
MaxConcurrent: getEnvInt("CERTCTL_HEALTH_CHECK_MAX_CONCURRENT", 20),
|
||||
HistoryRetention: getEnvDuration("CERTCTL_HEALTH_CHECK_HISTORY_RETENTION", 30*24*time.Hour),
|
||||
AutoCreate: getEnvBool("CERTCTL_HEALTH_CHECK_AUTO_CREATE", true),
|
||||
},
|
||||
Encryption: EncryptionConfig{
|
||||
ConfigEncryptionKey: getEnv("CERTCTL_CONFIG_ENCRYPTION_KEY", ""),
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user