feat: add network certificate discovery (M21) and Prometheus metrics (M22)

M21 adds server-side active TLS scanning of CIDR ranges with concurrent
probing, sentinel agent pattern for pipeline reuse, and full CRUD API for
scan targets. M22 adds Prometheus exposition format endpoint alongside
existing JSON metrics. Comprehensive documentation audit updates all docs
to reflect 91 endpoints, 19 tables, 6 scheduler loops, and 900+ tests.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Shankar
2026-03-24 23:37:47 -04:00
parent 3dc76e0b87
commit be85fbd77e
26 changed files with 2022 additions and 71 deletions
+92 -3
View File
@@ -3,6 +3,7 @@ package handler
import (
"context"
"encoding/json"
"fmt"
"net/http"
"time"
@@ -14,9 +15,9 @@ type MetricsService interface {
GetDashboardSummary(ctx context.Context) (interface{}, error)
}
// MetricsHandler handles HTTP requests for Prometheus-style metrics.
// In V2, returns JSON metrics (not Prometheus format).
// Prometheus format can be added in V3 when observability becomes a paid feature.
// MetricsHandler handles HTTP requests for metrics.
// Supports both JSON format (GET /api/v1/metrics) and Prometheus exposition format
// (GET /api/v1/metrics/prometheus) for integration with Prometheus, Grafana, Datadog, etc.
type MetricsHandler struct {
svc MetricsService
serverStarted time.Time
@@ -117,6 +118,94 @@ func (h MetricsHandler) GetMetrics(w http.ResponseWriter, r *http.Request) {
JSON(w, http.StatusOK, metricsResp)
}
// GetPrometheusMetrics returns metrics in Prometheus exposition format (text/plain).
// GET /api/v1/metrics/prometheus
// Compatible with Prometheus, Grafana Agent, Datadog Agent, Victoria Metrics, and any
// OpenMetrics-compatible scraper. Metric names follow Prometheus naming conventions
// (lowercase, snake_case, prefixed with certctl_).
func (h MetricsHandler) GetPrometheusMetrics(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
requestID := middleware.GetRequestID(r.Context())
summary, err := h.svc.GetDashboardSummary(r.Context())
if err != nil {
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to collect metrics", requestID)
return
}
// Extract fields from summary via JSON round-trip (avoids cross-package type assertion)
jsonBytes, err := json.Marshal(summary)
if err != nil {
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to marshal metrics data", requestID)
return
}
var dashboardSummary DashboardSummary
if err := json.Unmarshal(jsonBytes, &dashboardSummary); err != nil {
ErrorWithRequestID(w, http.StatusInternalServerError, "Invalid metrics data", requestID)
return
}
// Compute derived values
active := dashboardSummary.TotalCertificates - dashboardSummary.ExpiringCertificates - dashboardSummary.ExpiredCertificates - dashboardSummary.RevokedCertificates
uptimeSeconds := int64(time.Since(h.serverStarted).Seconds())
// Build Prometheus exposition format
// See: https://prometheus.io/docs/instrumenting/exposition_formats/
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
w.WriteHeader(http.StatusOK)
// Gauges — point-in-time values
fmt.Fprintf(w, "# HELP certctl_certificate_total Total number of managed certificates.\n")
fmt.Fprintf(w, "# TYPE certctl_certificate_total gauge\n")
fmt.Fprintf(w, "certctl_certificate_total %d\n\n", dashboardSummary.TotalCertificates)
fmt.Fprintf(w, "# HELP certctl_certificate_active Number of active (non-expiring, non-expired, non-revoked) certificates.\n")
fmt.Fprintf(w, "# TYPE certctl_certificate_active gauge\n")
fmt.Fprintf(w, "certctl_certificate_active %d\n\n", active)
fmt.Fprintf(w, "# HELP certctl_certificate_expiring_soon Number of certificates expiring within 30 days.\n")
fmt.Fprintf(w, "# TYPE certctl_certificate_expiring_soon gauge\n")
fmt.Fprintf(w, "certctl_certificate_expiring_soon %d\n\n", dashboardSummary.ExpiringCertificates)
fmt.Fprintf(w, "# HELP certctl_certificate_expired Number of expired certificates.\n")
fmt.Fprintf(w, "# TYPE certctl_certificate_expired gauge\n")
fmt.Fprintf(w, "certctl_certificate_expired %d\n\n", dashboardSummary.ExpiredCertificates)
fmt.Fprintf(w, "# HELP certctl_certificate_revoked Number of revoked certificates.\n")
fmt.Fprintf(w, "# TYPE certctl_certificate_revoked gauge\n")
fmt.Fprintf(w, "certctl_certificate_revoked %d\n\n", dashboardSummary.RevokedCertificates)
fmt.Fprintf(w, "# HELP certctl_agent_total Total number of registered agents.\n")
fmt.Fprintf(w, "# TYPE certctl_agent_total gauge\n")
fmt.Fprintf(w, "certctl_agent_total %d\n\n", dashboardSummary.TotalAgents)
fmt.Fprintf(w, "# HELP certctl_agent_online Number of agents currently online.\n")
fmt.Fprintf(w, "# TYPE certctl_agent_online gauge\n")
fmt.Fprintf(w, "certctl_agent_online %d\n\n", dashboardSummary.ActiveAgents)
fmt.Fprintf(w, "# HELP certctl_job_pending Number of jobs currently pending.\n")
fmt.Fprintf(w, "# TYPE certctl_job_pending gauge\n")
fmt.Fprintf(w, "certctl_job_pending %d\n\n", dashboardSummary.PendingJobs)
// Counters — cumulative values
fmt.Fprintf(w, "# HELP certctl_job_completed_total Total number of completed jobs.\n")
fmt.Fprintf(w, "# TYPE certctl_job_completed_total counter\n")
fmt.Fprintf(w, "certctl_job_completed_total %d\n\n", dashboardSummary.CompleteJobs)
fmt.Fprintf(w, "# HELP certctl_job_failed_total Total number of failed jobs.\n")
fmt.Fprintf(w, "# TYPE certctl_job_failed_total counter\n")
fmt.Fprintf(w, "certctl_job_failed_total %d\n\n", dashboardSummary.FailedJobs)
// Info — server uptime
fmt.Fprintf(w, "# HELP certctl_uptime_seconds Server uptime in seconds.\n")
fmt.Fprintf(w, "# TYPE certctl_uptime_seconds gauge\n")
fmt.Fprintf(w, "certctl_uptime_seconds %d\n", uptimeSeconds)
}
// DashboardSummary mirrors the service.DashboardSummary for JSON unmarshaling.
// JSON tags must match the service-layer struct exactly.
type DashboardSummary struct {
+179
View File
@@ -0,0 +1,179 @@
package handler
import (
"context"
"encoding/json"
"fmt"
"net/http"
"github.com/shankar0123/certctl/internal/domain"
)
// NetworkScanService defines the interface used by the network scan handler.
type NetworkScanService interface {
ListTargets(ctx context.Context) ([]*domain.NetworkScanTarget, error)
GetTarget(ctx context.Context, id string) (*domain.NetworkScanTarget, error)
CreateTarget(ctx context.Context, target *domain.NetworkScanTarget) (*domain.NetworkScanTarget, error)
UpdateTarget(ctx context.Context, id string, target *domain.NetworkScanTarget) (*domain.NetworkScanTarget, error)
DeleteTarget(ctx context.Context, id string) error
TriggerScan(ctx context.Context, targetID string) (*domain.DiscoveryScan, error)
}
// NetworkScanHandler handles HTTP requests for network scan targets.
type NetworkScanHandler struct {
svc NetworkScanService
}
// NewNetworkScanHandler creates a new network scan handler.
func NewNetworkScanHandler(svc NetworkScanService) NetworkScanHandler {
return NetworkScanHandler{svc: svc}
}
// ListNetworkScanTargets handles GET /api/v1/network-scan-targets
func (h NetworkScanHandler) ListNetworkScanTargets(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
targets, err := h.svc.ListTargets(r.Context())
if err != nil {
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to list network scan targets: %v", err))
return
}
if targets == nil {
targets = []*domain.NetworkScanTarget{}
}
JSON(w, http.StatusOK, PagedResponse{
Data: targets,
Total: int64(len(targets)),
Page: 1,
PerPage: len(targets),
})
}
// GetNetworkScanTarget handles GET /api/v1/network-scan-targets/{id}
func (h NetworkScanHandler) GetNetworkScanTarget(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
id := r.PathValue("id")
if id == "" {
Error(w, http.StatusBadRequest, "network scan target ID is required")
return
}
target, err := h.svc.GetTarget(r.Context(), id)
if err != nil {
Error(w, http.StatusNotFound, fmt.Sprintf("network scan target not found: %v", err))
return
}
JSON(w, http.StatusOK, target)
}
// CreateNetworkScanTarget handles POST /api/v1/network-scan-targets
func (h NetworkScanHandler) CreateNetworkScanTarget(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
var target domain.NetworkScanTarget
if err := json.NewDecoder(r.Body).Decode(&target); err != nil {
Error(w, http.StatusBadRequest, fmt.Sprintf("invalid request body: %v", err))
return
}
created, err := h.svc.CreateTarget(r.Context(), &target)
if err != nil {
Error(w, http.StatusBadRequest, fmt.Sprintf("failed to create network scan target: %v", err))
return
}
JSON(w, http.StatusCreated, created)
}
// UpdateNetworkScanTarget handles PUT /api/v1/network-scan-targets/{id}
func (h NetworkScanHandler) UpdateNetworkScanTarget(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPut {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
id := r.PathValue("id")
if id == "" {
Error(w, http.StatusBadRequest, "network scan target ID is required")
return
}
var target domain.NetworkScanTarget
if err := json.NewDecoder(r.Body).Decode(&target); err != nil {
Error(w, http.StatusBadRequest, fmt.Sprintf("invalid request body: %v", err))
return
}
updated, err := h.svc.UpdateTarget(r.Context(), id, &target)
if err != nil {
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to update network scan target: %v", err))
return
}
JSON(w, http.StatusOK, updated)
}
// DeleteNetworkScanTarget handles DELETE /api/v1/network-scan-targets/{id}
func (h NetworkScanHandler) DeleteNetworkScanTarget(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodDelete {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
id := r.PathValue("id")
if id == "" {
Error(w, http.StatusBadRequest, "network scan target ID is required")
return
}
if err := h.svc.DeleteTarget(r.Context(), id); err != nil {
Error(w, http.StatusNotFound, fmt.Sprintf("failed to delete network scan target: %v", err))
return
}
JSON(w, http.StatusNoContent, nil)
}
// TriggerNetworkScan handles POST /api/v1/network-scan-targets/{id}/scan
func (h NetworkScanHandler) TriggerNetworkScan(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
id := r.PathValue("id")
if id == "" {
Error(w, http.StatusBadRequest, "network scan target ID is required")
return
}
scan, err := h.svc.TriggerScan(r.Context(), id)
if err != nil {
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to trigger scan: %v", err))
return
}
// scan may be nil if no certs found
if scan == nil {
JSON(w, http.StatusOK, map[string]string{
"status": "completed",
"message": "Scan completed, no certificates found",
})
return
}
JSON(w, http.StatusAccepted, scan)
}
@@ -0,0 +1,220 @@
package handler
import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"testing"
"github.com/shankar0123/certctl/internal/domain"
)
// mockNetworkScanService implements NetworkScanService for testing.
type mockNetworkScanService struct {
targets []*domain.NetworkScanTarget
}
func (m *mockNetworkScanService) ListTargets(ctx context.Context) ([]*domain.NetworkScanTarget, error) {
return m.targets, nil
}
func (m *mockNetworkScanService) GetTarget(ctx context.Context, id string) (*domain.NetworkScanTarget, error) {
for _, t := range m.targets {
if t.ID == id {
return t, nil
}
}
return nil, fmt.Errorf("not found: %s", id)
}
func (m *mockNetworkScanService) CreateTarget(ctx context.Context, target *domain.NetworkScanTarget) (*domain.NetworkScanTarget, error) {
if target.Name == "" {
return nil, fmt.Errorf("name is required")
}
target.ID = "nst-test-123"
m.targets = append(m.targets, target)
return target, nil
}
func (m *mockNetworkScanService) UpdateTarget(ctx context.Context, id string, target *domain.NetworkScanTarget) (*domain.NetworkScanTarget, error) {
for _, t := range m.targets {
if t.ID == id {
if target.Name != "" {
t.Name = target.Name
}
return t, nil
}
}
return nil, fmt.Errorf("not found: %s", id)
}
func (m *mockNetworkScanService) DeleteTarget(ctx context.Context, id string) error {
for i, t := range m.targets {
if t.ID == id {
m.targets = append(m.targets[:i], m.targets[i+1:]...)
return nil
}
}
return fmt.Errorf("not found: %s", id)
}
func (m *mockNetworkScanService) TriggerScan(ctx context.Context, targetID string) (*domain.DiscoveryScan, error) {
for _, t := range m.targets {
if t.ID == targetID {
return &domain.DiscoveryScan{
ID: "dscan-test",
AgentID: "server-scanner",
CertificatesFound: 3,
}, nil
}
}
return nil, fmt.Errorf("not found: %s", targetID)
}
func TestListNetworkScanTargets(t *testing.T) {
svc := &mockNetworkScanService{
targets: []*domain.NetworkScanTarget{
{ID: "nst-1", Name: "target1", CIDRs: []string{"10.0.0.0/24"}, Ports: []int{443}},
{ID: "nst-2", Name: "target2", CIDRs: []string{"192.168.0.0/16"}, Ports: []int{443, 8443}},
},
}
h := NewNetworkScanHandler(svc)
req := httptest.NewRequest(http.MethodGet, "/api/v1/network-scan-targets", nil)
w := httptest.NewRecorder()
h.ListNetworkScanTargets(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d", w.Code)
}
var resp PagedResponse
json.NewDecoder(w.Body).Decode(&resp)
if resp.Total != 2 {
t.Errorf("expected total 2, got %d", resp.Total)
}
}
func TestListNetworkScanTargets_Empty(t *testing.T) {
svc := &mockNetworkScanService{}
h := NewNetworkScanHandler(svc)
req := httptest.NewRequest(http.MethodGet, "/api/v1/network-scan-targets", nil)
w := httptest.NewRecorder()
h.ListNetworkScanTargets(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d", w.Code)
}
}
func TestCreateNetworkScanTarget(t *testing.T) {
svc := &mockNetworkScanService{}
h := NewNetworkScanHandler(svc)
body, _ := json.Marshal(map[string]interface{}{
"name": "Production",
"cidrs": []string{"10.0.0.0/24"},
"ports": []int{443},
})
req := httptest.NewRequest(http.MethodPost, "/api/v1/network-scan-targets", bytes.NewReader(body))
w := httptest.NewRecorder()
h.CreateNetworkScanTarget(w, req)
if w.Code != http.StatusCreated {
t.Errorf("expected 201, got %d: %s", w.Code, w.Body.String())
}
}
func TestCreateNetworkScanTarget_InvalidJSON(t *testing.T) {
svc := &mockNetworkScanService{}
h := NewNetworkScanHandler(svc)
req := httptest.NewRequest(http.MethodPost, "/api/v1/network-scan-targets", bytes.NewReader([]byte("not json")))
w := httptest.NewRecorder()
h.CreateNetworkScanTarget(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400, got %d", w.Code)
}
}
func TestCreateNetworkScanTarget_MissingName(t *testing.T) {
svc := &mockNetworkScanService{}
h := NewNetworkScanHandler(svc)
body, _ := json.Marshal(map[string]interface{}{
"cidrs": []string{"10.0.0.0/24"},
})
req := httptest.NewRequest(http.MethodPost, "/api/v1/network-scan-targets", bytes.NewReader(body))
w := httptest.NewRecorder()
h.CreateNetworkScanTarget(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400, got %d", w.Code)
}
}
func TestDeleteNetworkScanTarget_NotFound(t *testing.T) {
svc := &mockNetworkScanService{}
h := NewNetworkScanHandler(svc)
req := httptest.NewRequest(http.MethodDelete, "/api/v1/network-scan-targets/nst-nonexistent", nil)
req.SetPathValue("id", "nst-nonexistent")
w := httptest.NewRecorder()
h.DeleteNetworkScanTarget(w, req)
if w.Code != http.StatusNotFound {
t.Errorf("expected 404, got %d", w.Code)
}
}
func TestTriggerNetworkScan(t *testing.T) {
svc := &mockNetworkScanService{
targets: []*domain.NetworkScanTarget{
{ID: "nst-1", Name: "target1"},
},
}
h := NewNetworkScanHandler(svc)
req := httptest.NewRequest(http.MethodPost, "/api/v1/network-scan-targets/nst-1/scan", nil)
req.SetPathValue("id", "nst-1")
w := httptest.NewRecorder()
h.TriggerNetworkScan(w, req)
if w.Code != http.StatusAccepted {
t.Errorf("expected 202, got %d: %s", w.Code, w.Body.String())
}
}
func TestTriggerNetworkScan_NotFound(t *testing.T) {
svc := &mockNetworkScanService{}
h := NewNetworkScanHandler(svc)
req := httptest.NewRequest(http.MethodPost, "/api/v1/network-scan-targets/nst-nonexistent/scan", nil)
req.SetPathValue("id", "nst-nonexistent")
w := httptest.NewRecorder()
h.TriggerNetworkScan(w, req)
if w.Code != http.StatusInternalServerError {
t.Errorf("expected 500, got %d", w.Code)
}
}
func TestListNetworkScanTargets_MethodNotAllowed(t *testing.T) {
svc := &mockNetworkScanService{}
h := NewNetworkScanHandler(svc)
req := httptest.NewRequest(http.MethodPost, "/api/v1/network-scan-targets", nil)
w := httptest.NewRecorder()
h.ListNetworkScanTargets(w, req)
if w.Code != http.StatusMethodNotAllowed {
t.Errorf("expected 405, got %d", w.Code)
}
}
+114
View File
@@ -5,6 +5,7 @@ import (
"fmt"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
)
@@ -202,3 +203,116 @@ func TestGetMetrics_ServiceError(t *testing.T) {
t.Errorf("expected 500, got %d", w.Code)
}
}
// --- Prometheus metrics endpoint tests ---
func TestGetPrometheusMetrics_Success(t *testing.T) {
mock := &MockStatsService{
GetDashboardSummaryFn: func(ctx context.Context) (interface{}, error) {
return &DashboardSummary{
TotalCertificates: 25,
ExpiringCertificates: 3,
ExpiredCertificates: 2,
RevokedCertificates: 1,
ActiveAgents: 4,
TotalAgents: 6,
PendingJobs: 2,
FailedJobs: 1,
CompleteJobs: 15,
}, nil
},
}
h := NewMetricsHandler(mock, time.Now().Add(-1*time.Hour))
req := httptest.NewRequest(http.MethodGet, "/api/v1/metrics/prometheus", nil)
w := httptest.NewRecorder()
h.GetPrometheusMetrics(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d", w.Code)
}
contentType := w.Header().Get("Content-Type")
if contentType != "text/plain; version=0.0.4; charset=utf-8" {
t.Errorf("expected Prometheus content type, got %q", contentType)
}
body := w.Body.String()
// Check metric lines are present
expected := []string{
"certctl_certificate_total 25",
"certctl_certificate_active 19",
"certctl_certificate_expiring_soon 3",
"certctl_certificate_expired 2",
"certctl_certificate_revoked 1",
"certctl_agent_total 6",
"certctl_agent_online 4",
"certctl_job_pending 2",
"certctl_job_completed_total 15",
"certctl_job_failed_total 1",
"# TYPE certctl_certificate_total gauge",
"# TYPE certctl_job_completed_total counter",
"# HELP certctl_uptime_seconds",
"# TYPE certctl_uptime_seconds gauge",
}
for _, exp := range expected {
if !containsLine(body, exp) {
t.Errorf("expected body to contain %q", exp)
}
}
}
func TestGetPrometheusMetrics_MethodNotAllowed(t *testing.T) {
mock := &MockStatsService{}
h := NewMetricsHandler(mock, time.Now())
req := httptest.NewRequest(http.MethodPost, "/api/v1/metrics/prometheus", nil)
w := httptest.NewRecorder()
h.GetPrometheusMetrics(w, req)
if w.Code != http.StatusMethodNotAllowed {
t.Errorf("expected 405, got %d", w.Code)
}
}
func TestGetPrometheusMetrics_ServiceError(t *testing.T) {
mock := &MockStatsService{
GetDashboardSummaryFn: func(ctx context.Context) (interface{}, error) {
return nil, fmt.Errorf("db error")
},
}
h := NewMetricsHandler(mock, time.Now())
req := httptest.NewRequest(http.MethodGet, "/api/v1/metrics/prometheus", nil)
w := httptest.NewRecorder()
h.GetPrometheusMetrics(w, req)
if w.Code != http.StatusInternalServerError {
t.Errorf("expected 500, got %d", w.Code)
}
}
func TestGetPrometheusMetrics_ZeroValues(t *testing.T) {
mock := &MockStatsService{
GetDashboardSummaryFn: func(ctx context.Context) (interface{}, error) {
return &DashboardSummary{}, nil
},
}
h := NewMetricsHandler(mock, time.Now())
req := httptest.NewRequest(http.MethodGet, "/api/v1/metrics/prometheus", nil)
w := httptest.NewRecorder()
h.GetPrometheusMetrics(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d", w.Code)
}
body := w.Body.String()
if !containsLine(body, "certctl_certificate_total 0") {
t.Error("expected zero value for certificate_total")
}
if !containsLine(body, "certctl_job_pending 0") {
t.Error("expected zero value for job_pending")
}
}
// containsLine checks if the text contains the given substring.
func containsLine(text, substr string) bool {
return strings.Contains(text, substr)
}