feat(M48): continuous TLS health monitoring — endpoint state machine, shared tlsprobe, 8 API endpoints, GUI

Adds continuous TLS endpoint health monitoring that closes the deploy→verify→monitor loop.
After M25 verifies a deployment succeeded once, M48 continuously confirms it stays healthy.

Key components:
- Shared `internal/tlsprobe/` package extracted from network scanner for reuse
- Health status state machine: healthy → degraded (2 failures) → down (5 failures),
  plus cert_mismatch when served fingerprint differs from expected
- 8th scheduler loop (60s tick, per-endpoint configurable intervals)
- PostgreSQL migration 000011: endpoint_health_checks + endpoint_health_history tables
- 8 REST API endpoints (CRUD, history, acknowledge, summary)
- Health Monitor GUI page with summary bar, status table, create modal, auto-refresh
- 38 new tests (5 tlsprobe + 11 domain + 10 service + 8 handler + 4 frontend)
- All coverage thresholds maintained (service 68%, handler 83%, domain 87%, middleware 63%)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
shankar0123
2026-04-15 21:45:45 -04:00
parent f2e60b93a3
commit 596d86a206
29 changed files with 3540 additions and 30 deletions
+308
View File
@@ -0,0 +1,308 @@
package handler
import (
"context"
"encoding/json"
"fmt"
"net/http"
"strconv"
"github.com/shankar0123/certctl/internal/domain"
"github.com/shankar0123/certctl/internal/repository"
)
// HealthCheckServicer defines the interface used by the health check handler.
type HealthCheckServicer interface {
Create(ctx context.Context, check *domain.EndpointHealthCheck) error
Get(ctx context.Context, id string) (*domain.EndpointHealthCheck, error)
Update(ctx context.Context, check *domain.EndpointHealthCheck) error
Delete(ctx context.Context, id string) error
List(ctx context.Context, filter *repository.HealthCheckFilter) ([]*domain.EndpointHealthCheck, int, error)
GetHistory(ctx context.Context, healthCheckID string, limit int) ([]*domain.HealthHistoryEntry, error)
AcknowledgeIncident(ctx context.Context, id string, actor string) error
GetSummary(ctx context.Context) (*domain.HealthCheckSummary, error)
}
// HealthCheckHandler handles HTTP requests for TLS health monitoring.
type HealthCheckHandler struct {
service HealthCheckServicer
}
// NewHealthCheckHandler creates a new health check handler.
func NewHealthCheckHandler(service HealthCheckServicer) *HealthCheckHandler {
return &HealthCheckHandler{service: service}
}
// ListHealthChecks handles GET /api/v1/health-checks
func (h *HealthCheckHandler) ListHealthChecks(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
query := r.URL.Query()
status := query.Get("status")
certificateID := query.Get("certificate_id")
networkScanTargetID := query.Get("network_scan_target_id")
enabledStr := query.Get("enabled")
page := parseIntDefault(query.Get("page"), 1)
perPage := parseIntDefault(query.Get("per_page"), 50)
if perPage > 500 {
perPage = 50
}
// Parse enabled flag if provided
var enabledFilter *bool
if enabledStr != "" {
enabled := enabledStr == "true"
enabledFilter = &enabled
}
filter := &repository.HealthCheckFilter{
Status: status,
CertificateID: certificateID,
NetworkScanTargetID: networkScanTargetID,
Enabled: enabledFilter,
Page: page,
PerPage: perPage,
}
checks, total, err := h.service.List(r.Context(), filter)
if err != nil {
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to list health checks: %v", err))
return
}
if checks == nil {
checks = make([]*domain.EndpointHealthCheck, 0)
}
JSON(w, http.StatusOK, PagedResponse{
Data: checks,
Total: int64(total),
Page: page,
PerPage: perPage,
})
}
// GetHealthCheck handles GET /api/v1/health-checks/{id}
func (h *HealthCheckHandler) GetHealthCheck(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
id := r.PathValue("id")
if id == "" {
Error(w, http.StatusBadRequest, "health check ID is required")
return
}
check, err := h.service.Get(r.Context(), id)
if err != nil {
Error(w, http.StatusNotFound, fmt.Sprintf("health check not found: %v", err))
return
}
JSON(w, http.StatusOK, check)
}
// CreateHealthCheck handles POST /api/v1/health-checks
func (h *HealthCheckHandler) CreateHealthCheck(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
var check domain.EndpointHealthCheck
if err := json.NewDecoder(r.Body).Decode(&check); err != nil {
Error(w, http.StatusBadRequest, fmt.Sprintf("invalid request body: %v", err))
return
}
if check.Endpoint == "" {
Error(w, http.StatusBadRequest, "endpoint is required")
return
}
// Set defaults
if check.CheckIntervalSecs <= 0 {
check.CheckIntervalSecs = 300
}
if check.DegradedThreshold <= 0 {
check.DegradedThreshold = 2
}
if check.DownThreshold <= 0 {
check.DownThreshold = 5
}
if check.Status == "" {
check.Status = domain.HealthStatusUnknown
}
if err := h.service.Create(r.Context(), &check); err != nil {
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to create health check: %v", err))
return
}
JSON(w, http.StatusCreated, check)
}
// UpdateHealthCheck handles PUT /api/v1/health-checks/{id}
func (h *HealthCheckHandler) UpdateHealthCheck(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPut {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
id := r.PathValue("id")
if id == "" {
Error(w, http.StatusBadRequest, "health check ID is required")
return
}
// Get existing check
existing, err := h.service.Get(r.Context(), id)
if err != nil {
Error(w, http.StatusNotFound, fmt.Sprintf("health check not found: %v", err))
return
}
var updates domain.EndpointHealthCheck
if err := json.NewDecoder(r.Body).Decode(&updates); err != nil {
Error(w, http.StatusBadRequest, fmt.Sprintf("invalid request body: %v", err))
return
}
// Merge updates (only update provided fields)
if updates.Endpoint != "" {
existing.Endpoint = updates.Endpoint
}
if updates.ExpectedFingerprint != "" {
existing.ExpectedFingerprint = updates.ExpectedFingerprint
}
if updates.CheckIntervalSecs > 0 {
existing.CheckIntervalSecs = updates.CheckIntervalSecs
}
if updates.DegradedThreshold > 0 {
existing.DegradedThreshold = updates.DegradedThreshold
}
if updates.DownThreshold > 0 {
existing.DownThreshold = updates.DownThreshold
}
existing.Enabled = updates.Enabled
if err := h.service.Update(r.Context(), existing); err != nil {
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to update health check: %v", err))
return
}
JSON(w, http.StatusOK, existing)
}
// DeleteHealthCheck handles DELETE /api/v1/health-checks/{id}
func (h *HealthCheckHandler) DeleteHealthCheck(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodDelete {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
id := r.PathValue("id")
if id == "" {
Error(w, http.StatusBadRequest, "health check ID is required")
return
}
if err := h.service.Delete(r.Context(), id); err != nil {
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to delete health check: %v", err))
return
}
w.WriteHeader(http.StatusNoContent)
}
// GetHealthCheckHistory handles GET /api/v1/health-checks/{id}/history
func (h *HealthCheckHandler) GetHealthCheckHistory(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
id := r.PathValue("id")
if id == "" {
Error(w, http.StatusBadRequest, "health check ID is required")
return
}
limitStr := r.URL.Query().Get("limit")
limit := 100
if limitStr != "" {
if l, err := strconv.Atoi(limitStr); err == nil && l > 0 {
limit = l
}
}
if limit > 1000 {
limit = 1000
}
history, err := h.service.GetHistory(r.Context(), id, limit)
if err != nil {
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to get health check history: %v", err))
return
}
if history == nil {
history = make([]*domain.HealthHistoryEntry, 0)
}
JSON(w, http.StatusOK, history)
}
// AcknowledgeHealthCheck handles POST /api/v1/health-checks/{id}/acknowledge
func (h *HealthCheckHandler) AcknowledgeHealthCheck(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
id := r.PathValue("id")
if id == "" {
Error(w, http.StatusBadRequest, "health check ID is required")
return
}
var req struct {
Actor string `json:"actor,omitempty"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
Error(w, http.StatusBadRequest, fmt.Sprintf("invalid request body: %v", err))
return
}
if req.Actor == "" {
req.Actor = "unknown"
}
if err := h.service.AcknowledgeIncident(r.Context(), id, req.Actor); err != nil {
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to acknowledge health check: %v", err))
return
}
w.WriteHeader(http.StatusNoContent)
}
// GetHealthCheckSummary handles GET /api/v1/health-checks/summary
// This route must be registered BEFORE the /{id} routes
func (h *HealthCheckHandler) GetHealthCheckSummary(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
summary, err := h.service.GetSummary(r.Context())
if err != nil {
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to get health check summary: %v", err))
return
}
JSON(w, http.StatusOK, summary)
}
@@ -0,0 +1,305 @@
package handler
import (
"bytes"
"context"
"encoding/json"
"errors"
"net/http"
"net/http/httptest"
"testing"
"github.com/shankar0123/certctl/internal/domain"
"github.com/shankar0123/certctl/internal/repository"
)
// mockHealthCheckSvc implements HealthCheckServicer for testing.
type mockHealthCheckSvc struct {
createErr error
getErr error
updateErr error
deleteErr error
listErr error
getHistoryErr error
acknowledgeErr error
getSummaryErr error
checks map[string]*domain.EndpointHealthCheck
summary *domain.HealthCheckSummary
}
func newMockHealthCheckSvc() *mockHealthCheckSvc {
return &mockHealthCheckSvc{
checks: make(map[string]*domain.EndpointHealthCheck),
summary: &domain.HealthCheckSummary{
Healthy: 1,
Degraded: 0,
Down: 0,
CertMismatch: 0,
Unknown: 0,
},
}
}
func (m *mockHealthCheckSvc) Create(ctx context.Context, check *domain.EndpointHealthCheck) error {
if m.createErr != nil {
return m.createErr
}
check.ID = "hc-created-1"
m.checks[check.ID] = check
return nil
}
func (m *mockHealthCheckSvc) Get(ctx context.Context, id string) (*domain.EndpointHealthCheck, error) {
if m.getErr != nil {
return nil, m.getErr
}
if check, ok := m.checks[id]; ok {
return check, nil
}
return nil, errors.New("not found")
}
func (m *mockHealthCheckSvc) Update(ctx context.Context, check *domain.EndpointHealthCheck) error {
if m.updateErr != nil {
return m.updateErr
}
m.checks[check.ID] = check
return nil
}
func (m *mockHealthCheckSvc) Delete(ctx context.Context, id string) error {
if m.deleteErr != nil {
return m.deleteErr
}
delete(m.checks, id)
return nil
}
func (m *mockHealthCheckSvc) List(ctx context.Context, filter *repository.HealthCheckFilter) ([]*domain.EndpointHealthCheck, int, error) {
if m.listErr != nil {
return nil, 0, m.listErr
}
checks := make([]*domain.EndpointHealthCheck, 0, len(m.checks))
for _, check := range m.checks {
checks = append(checks, check)
}
return checks, len(checks), nil
}
func (m *mockHealthCheckSvc) GetHistory(ctx context.Context, healthCheckID string, limit int) ([]*domain.HealthHistoryEntry, error) {
if m.getHistoryErr != nil {
return nil, m.getHistoryErr
}
return make([]*domain.HealthHistoryEntry, 0), nil
}
func (m *mockHealthCheckSvc) AcknowledgeIncident(ctx context.Context, id string, actor string) error {
if m.acknowledgeErr != nil {
return m.acknowledgeErr
}
if check, ok := m.checks[id]; ok {
check.Acknowledged = true
check.AcknowledgedBy = actor
}
return nil
}
func (m *mockHealthCheckSvc) GetSummary(ctx context.Context) (*domain.HealthCheckSummary, error) {
if m.getSummaryErr != nil {
return nil, m.getSummaryErr
}
return m.summary, nil
}
// Tests
func TestListHealthChecks_Success(t *testing.T) {
svc := newMockHealthCheckSvc()
svc.checks["hc-1"] = &domain.EndpointHealthCheck{
ID: "hc-1",
Endpoint: "api.example.com:443",
Status: domain.HealthStatusHealthy,
}
handler := NewHealthCheckHandler(svc)
req := httptest.NewRequest("GET", "/api/v1/health-checks", nil)
w := httptest.NewRecorder()
handler.ListHealthChecks(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200, got %d", w.Code)
}
var resp PagedResponse
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatalf("Failed to decode response: %v", err)
}
if resp.Total != 1 {
t.Errorf("Expected 1 health check, got %d", resp.Total)
}
}
func TestListHealthChecks_MethodNotAllowed(t *testing.T) {
handler := NewHealthCheckHandler(newMockHealthCheckSvc())
req := httptest.NewRequest("POST", "/api/v1/health-checks", nil)
w := httptest.NewRecorder()
handler.ListHealthChecks(w, req)
if w.Code != http.StatusMethodNotAllowed {
t.Errorf("Expected status 405, got %d", w.Code)
}
}
func TestGetHealthCheck_Success(t *testing.T) {
svc := newMockHealthCheckSvc()
check := &domain.EndpointHealthCheck{
ID: "hc-1",
Endpoint: "api.example.com:443",
Status: domain.HealthStatusHealthy,
}
svc.checks["hc-1"] = check
handler := NewHealthCheckHandler(svc)
req := httptest.NewRequest("GET", "/api/v1/health-checks/hc-1", nil)
req.SetPathValue("id", "hc-1")
w := httptest.NewRecorder()
handler.GetHealthCheck(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200, got %d", w.Code)
}
var resp domain.EndpointHealthCheck
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatalf("Failed to decode response: %v", err)
}
if resp.ID != "hc-1" {
t.Errorf("Expected ID hc-1, got %s", resp.ID)
}
}
func TestGetHealthCheck_NotFound(t *testing.T) {
handler := NewHealthCheckHandler(newMockHealthCheckSvc())
req := httptest.NewRequest("GET", "/api/v1/health-checks/nonexistent", nil)
req.SetPathValue("id", "nonexistent")
w := httptest.NewRecorder()
handler.GetHealthCheck(w, req)
if w.Code != http.StatusNotFound {
t.Errorf("Expected status 404, got %d", w.Code)
}
}
func TestCreateHealthCheck_Success(t *testing.T) {
svc := newMockHealthCheckSvc()
handler := NewHealthCheckHandler(svc)
check := domain.EndpointHealthCheck{
Endpoint: "web.example.com:443",
Enabled: true,
}
body, _ := json.Marshal(check)
req := httptest.NewRequest("POST", "/api/v1/health-checks", bytes.NewReader(body))
w := httptest.NewRecorder()
handler.CreateHealthCheck(w, req)
if w.Code != http.StatusCreated {
t.Errorf("Expected status 201, got %d", w.Code)
}
var resp domain.EndpointHealthCheck
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatalf("Failed to decode response: %v", err)
}
if resp.Endpoint != "web.example.com:443" {
t.Errorf("Expected endpoint web.example.com:443, got %s", resp.Endpoint)
}
}
func TestDeleteHealthCheck_Success(t *testing.T) {
svc := newMockHealthCheckSvc()
svc.checks["hc-1"] = &domain.EndpointHealthCheck{
ID: "hc-1",
Endpoint: "api.example.com:443",
}
handler := NewHealthCheckHandler(svc)
req := httptest.NewRequest("DELETE", "/api/v1/health-checks/hc-1", nil)
req.SetPathValue("id", "hc-1")
w := httptest.NewRecorder()
handler.DeleteHealthCheck(w, req)
if w.Code != http.StatusNoContent {
t.Errorf("Expected status 204, got %d", w.Code)
}
if _, ok := svc.checks["hc-1"]; ok {
t.Fatal("Expected check to be deleted")
}
}
func TestAcknowledgeHealthCheck_Success(t *testing.T) {
svc := newMockHealthCheckSvc()
svc.checks["hc-1"] = &domain.EndpointHealthCheck{
ID: "hc-1",
Endpoint: "api.example.com:443",
Status: domain.HealthStatusDown,
}
handler := NewHealthCheckHandler(svc)
req := httptest.NewRequest("POST", "/api/v1/health-checks/hc-1/acknowledge", bytes.NewReader([]byte(`{"actor":"user@example.com"}`)))
req.SetPathValue("id", "hc-1")
w := httptest.NewRecorder()
handler.AcknowledgeHealthCheck(w, req)
if w.Code != http.StatusNoContent {
t.Errorf("Expected status 204, got %d", w.Code)
}
if !svc.checks["hc-1"].Acknowledged {
t.Fatal("Expected check to be acknowledged")
}
}
func TestGetHealthCheckSummary_Success(t *testing.T) {
svc := newMockHealthCheckSvc()
svc.summary = &domain.HealthCheckSummary{
Healthy: 3,
Degraded: 1,
Down: 0,
CertMismatch: 0,
Unknown: 1,
}
handler := NewHealthCheckHandler(svc)
req := httptest.NewRequest("GET", "/api/v1/health-checks/summary", nil)
w := httptest.NewRecorder()
handler.GetHealthCheckSummary(w, req)
if w.Code != http.StatusOK {
t.Errorf("Expected status 200, got %d", w.Code)
}
var resp domain.HealthCheckSummary
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatalf("Failed to decode response: %v", err)
}
if resp.Healthy != 3 {
t.Errorf("Expected 3 healthy checks, got %d", resp.Healthy)
}
}