feat: M14 — Observability (dashboard charts, agent fleet, stats API, metrics, structured logging, rollback)

Backend: StatsService with 5 aggregation methods, JSON metrics endpoint, slog-based
structured logging middleware. Stats API: dashboard summary, certificates-by-status,
expiration timeline, job trends, issuance rate. 23 new backend tests.

Frontend: Recharts-powered dashboard with 4 charts (status pie, expiration heatmap,
job trends line, issuance bar), agent fleet overview page with OS/arch grouping and
version breakdown, deployment rollback buttons on version history. 7 new frontend tests.

78 API endpoints, 744+ total tests (658 Go + 86 Vitest).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
shankar0123
2026-03-22 19:46:13 -04:00
parent 2f65dd1a61
commit ee75f149ae
21 changed files with 2125 additions and 28 deletions
+134
View File
@@ -0,0 +1,134 @@
package handler
import (
"context"
"encoding/json"
"net/http"
"time"
"github.com/shankar0123/certctl/internal/api/middleware"
)
// MetricsService defines the service interface for metrics collection.
type MetricsService interface {
GetDashboardSummary(ctx context.Context) (interface{}, error)
}
// MetricsHandler handles HTTP requests for Prometheus-style metrics.
// In V2, returns JSON metrics (not Prometheus format).
// Prometheus format can be added in V3 when observability becomes a paid feature.
type MetricsHandler struct {
svc MetricsService
serverStarted time.Time
}
// NewMetricsHandler creates a new MetricsHandler with a service dependency.
// serverStarted is used to calculate uptime_seconds.
func NewMetricsHandler(svc MetricsService, serverStarted time.Time) MetricsHandler {
return MetricsHandler{
svc: svc,
serverStarted: serverStarted,
}
}
// MetricsResponse represents the JSON metrics response for V2.
type MetricsResponse struct {
Gauge MetricsGauge `json:"gauge"`
Counter MetricsCounter `json:"counter"`
Uptime UptimeMetric `json:"uptime"`
}
// MetricsGauge represents gauge metrics (point-in-time values).
type MetricsGauge struct {
CertificateTotal int64 `json:"certificate_total"`
CertificateActive int64 `json:"certificate_active"`
CertificateExpiringSoon int64 `json:"certificate_expiring_soon"` // Within 30d
CertificateExpired int64 `json:"certificate_expired"`
CertificateRevoked int64 `json:"certificate_revoked"`
AgentTotal int64 `json:"agent_total"`
AgentOnline int64 `json:"agent_online"`
JobPending int64 `json:"job_pending"`
}
// MetricsCounter represents counter metrics (cumulative values).
type MetricsCounter struct {
JobCompletedTotal int64 `json:"job_completed_total"`
JobFailedTotal int64 `json:"job_failed_total"`
}
// UptimeMetric represents server uptime information.
type UptimeMetric struct {
UptimeSeconds int64 `json:"uptime_seconds"`
ServerStarted time.Time `json:"server_started"`
MeasuredAt time.Time `json:"measured_at"`
}
// GetMetrics returns JSON metrics (aggregated from dashboard summary).
// GET /api/v1/metrics
func (h MetricsHandler) GetMetrics(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
requestID := middleware.GetRequestID(r.Context())
summary, err := h.svc.GetDashboardSummary(r.Context())
if err != nil {
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to collect metrics", requestID)
return
}
// Extract fields from summary via JSON round-trip (avoids cross-package type assertion)
jsonBytes, err := json.Marshal(summary)
if err != nil {
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to marshal metrics data", requestID)
return
}
var dashboardSummary DashboardSummary
if err := json.Unmarshal(jsonBytes, &dashboardSummary); err != nil {
ErrorWithRequestID(w, http.StatusInternalServerError, "Invalid metrics data", requestID)
return
}
// Build metrics response
metricsResp := MetricsResponse{
Gauge: MetricsGauge{
CertificateTotal: dashboardSummary.TotalCertificates,
CertificateActive: dashboardSummary.TotalCertificates - dashboardSummary.ExpiringCertificates - dashboardSummary.ExpiredCertificates - dashboardSummary.RevokedCertificates,
CertificateExpiringSoon: dashboardSummary.ExpiringCertificates,
CertificateExpired: dashboardSummary.ExpiredCertificates,
CertificateRevoked: dashboardSummary.RevokedCertificates,
AgentTotal: dashboardSummary.TotalAgents,
AgentOnline: dashboardSummary.ActiveAgents,
JobPending: dashboardSummary.PendingJobs,
},
Counter: MetricsCounter{
JobCompletedTotal: dashboardSummary.CompleteJobs,
JobFailedTotal: dashboardSummary.FailedJobs,
},
Uptime: UptimeMetric{
UptimeSeconds: int64(time.Since(h.serverStarted).Seconds()),
ServerStarted: h.serverStarted,
MeasuredAt: time.Now(),
},
}
JSON(w, http.StatusOK, metricsResp)
}
// DashboardSummary mirrors the service.DashboardSummary for JSON unmarshaling.
// JSON tags must match the service-layer struct exactly.
type DashboardSummary struct {
TotalCertificates int64 `json:"total_certificates"`
ExpiringCertificates int64 `json:"expiring_certificates"`
ExpiredCertificates int64 `json:"expired_certificates"`
RevokedCertificates int64 `json:"revoked_certificates"`
ActiveAgents int64 `json:"active_agents"`
OfflineAgents int64 `json:"offline_agents"`
TotalAgents int64 `json:"total_agents"`
PendingJobs int64 `json:"pending_jobs"`
FailedJobs int64 `json:"failed_jobs"`
CompleteJobs int64 `json:"complete_jobs"`
CompletedAt time.Time `json:"completed_at"`
}
+147
View File
@@ -0,0 +1,147 @@
package handler
import (
"context"
"net/http"
"strconv"
"github.com/shankar0123/certctl/internal/api/middleware"
)
// StatsService defines the service interface for statistics operations.
type StatsService interface {
GetDashboardSummary(ctx context.Context) (interface{}, error)
GetCertificatesByStatus(ctx context.Context) (interface{}, error)
GetExpirationTimeline(ctx context.Context, days int) (interface{}, error)
GetJobStats(ctx context.Context, days int) (interface{}, error)
GetIssuanceRate(ctx context.Context, days int) (interface{}, error)
}
// StatsHandler handles HTTP requests for statistics and observability endpoints.
type StatsHandler struct {
svc StatsService
}
// NewStatsHandler creates a new StatsHandler with a service dependency.
func NewStatsHandler(svc StatsService) StatsHandler {
return StatsHandler{svc: svc}
}
// GetDashboardSummary returns a high-level summary of system state.
// GET /api/v1/stats/summary
func (h StatsHandler) GetDashboardSummary(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
requestID := middleware.GetRequestID(r.Context())
summary, err := h.svc.GetDashboardSummary(r.Context())
if err != nil {
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to get dashboard summary", requestID)
return
}
JSON(w, http.StatusOK, summary)
}
// GetCertificatesByStatus returns certificate counts grouped by status.
// GET /api/v1/stats/certificates-by-status
func (h StatsHandler) GetCertificatesByStatus(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
requestID := middleware.GetRequestID(r.Context())
counts, err := h.svc.GetCertificatesByStatus(r.Context())
if err != nil {
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to get certificate status counts", requestID)
return
}
JSON(w, http.StatusOK, counts)
}
// GetExpirationTimeline returns certificates expiring over the next N days.
// GET /api/v1/stats/expiration-timeline?days=30
func (h StatsHandler) GetExpirationTimeline(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
requestID := middleware.GetRequestID(r.Context())
// Parse query parameter
days := 30
if d := r.URL.Query().Get("days"); d != "" {
if parsed, err := strconv.Atoi(d); err == nil && parsed > 0 && parsed <= 365 {
days = parsed
}
}
timeline, err := h.svc.GetExpirationTimeline(r.Context(), days)
if err != nil {
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to get expiration timeline", requestID)
return
}
JSON(w, http.StatusOK, timeline)
}
// GetJobTrends returns job success/failure trends over the past N days.
// GET /api/v1/stats/job-trends?days=30
func (h StatsHandler) GetJobTrends(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
requestID := middleware.GetRequestID(r.Context())
// Parse query parameter
days := 30
if d := r.URL.Query().Get("days"); d != "" {
if parsed, err := strconv.Atoi(d); err == nil && parsed > 0 && parsed <= 365 {
days = parsed
}
}
trends, err := h.svc.GetJobStats(r.Context(), days)
if err != nil {
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to get job trends", requestID)
return
}
JSON(w, http.StatusOK, trends)
}
// GetIssuanceRate returns the rate of new certificate issuance over the past N days.
// GET /api/v1/stats/issuance-rate?days=30
func (h StatsHandler) GetIssuanceRate(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
return
}
requestID := middleware.GetRequestID(r.Context())
// Parse query parameter
days := 30
if d := r.URL.Query().Get("days"); d != "" {
if parsed, err := strconv.Atoi(d); err == nil && parsed > 0 && parsed <= 365 {
days = parsed
}
}
issuanceRate, err := h.svc.GetIssuanceRate(r.Context(), days)
if err != nil {
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to get issuance rate", requestID)
return
}
JSON(w, http.StatusOK, issuanceRate)
}
+204
View File
@@ -0,0 +1,204 @@
package handler
import (
"context"
"fmt"
"net/http"
"net/http/httptest"
"testing"
"time"
)
// MockStatsService implements both StatsService and MetricsService.
type MockStatsService struct {
GetDashboardSummaryFn func(ctx context.Context) (interface{}, error)
GetCertificatesByStatusFn func(ctx context.Context) (interface{}, error)
GetExpirationTimelineFn func(ctx context.Context, days int) (interface{}, error)
GetJobStatsFn func(ctx context.Context, days int) (interface{}, error)
GetIssuanceRateFn func(ctx context.Context, days int) (interface{}, error)
}
func (m *MockStatsService) GetDashboardSummary(ctx context.Context) (interface{}, error) {
if m.GetDashboardSummaryFn != nil {
return m.GetDashboardSummaryFn(ctx)
}
return map[string]int64{"total_certificates": 0}, nil
}
func (m *MockStatsService) GetCertificatesByStatus(ctx context.Context) (interface{}, error) {
if m.GetCertificatesByStatusFn != nil {
return m.GetCertificatesByStatusFn(ctx)
}
return []interface{}{}, nil
}
func (m *MockStatsService) GetExpirationTimeline(ctx context.Context, days int) (interface{}, error) {
if m.GetExpirationTimelineFn != nil {
return m.GetExpirationTimelineFn(ctx, days)
}
return []interface{}{}, nil
}
func (m *MockStatsService) GetJobStats(ctx context.Context, days int) (interface{}, error) {
if m.GetJobStatsFn != nil {
return m.GetJobStatsFn(ctx, days)
}
return []interface{}{}, nil
}
func (m *MockStatsService) GetIssuanceRate(ctx context.Context, days int) (interface{}, error) {
if m.GetIssuanceRateFn != nil {
return m.GetIssuanceRateFn(ctx, days)
}
return []interface{}{}, nil
}
func TestGetDashboardSummary_Success(t *testing.T) {
mock := &MockStatsService{}
h := NewStatsHandler(mock)
req := httptest.NewRequest(http.MethodGet, "/api/v1/stats/summary", nil)
w := httptest.NewRecorder()
h.GetDashboardSummary(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d", w.Code)
}
}
func TestGetDashboardSummary_MethodNotAllowed(t *testing.T) {
mock := &MockStatsService{}
h := NewStatsHandler(mock)
req := httptest.NewRequest(http.MethodPost, "/api/v1/stats/summary", nil)
w := httptest.NewRecorder()
h.GetDashboardSummary(w, req)
if w.Code != http.StatusMethodNotAllowed {
t.Errorf("expected 405, got %d", w.Code)
}
}
func TestGetDashboardSummary_ServiceError(t *testing.T) {
mock := &MockStatsService{
GetDashboardSummaryFn: func(ctx context.Context) (interface{}, error) {
return nil, fmt.Errorf("db error")
},
}
h := NewStatsHandler(mock)
req := httptest.NewRequest(http.MethodGet, "/api/v1/stats/summary", nil)
w := httptest.NewRecorder()
h.GetDashboardSummary(w, req)
if w.Code != http.StatusInternalServerError {
t.Errorf("expected 500, got %d", w.Code)
}
}
func TestGetCertificatesByStatus_Success(t *testing.T) {
mock := &MockStatsService{}
h := NewStatsHandler(mock)
req := httptest.NewRequest(http.MethodGet, "/api/v1/stats/certificates-by-status", nil)
w := httptest.NewRecorder()
h.GetCertificatesByStatus(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d", w.Code)
}
}
func TestGetExpirationTimeline_Success(t *testing.T) {
mock := &MockStatsService{}
h := NewStatsHandler(mock)
req := httptest.NewRequest(http.MethodGet, "/api/v1/stats/expiration-timeline?days=60", nil)
w := httptest.NewRecorder()
h.GetExpirationTimeline(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d", w.Code)
}
}
func TestGetExpirationTimeline_DefaultDays(t *testing.T) {
mock := &MockStatsService{
GetExpirationTimelineFn: func(ctx context.Context, days int) (interface{}, error) {
if days != 30 {
t.Errorf("expected default 30 days, got %d", days)
}
return []interface{}{}, nil
},
}
h := NewStatsHandler(mock)
req := httptest.NewRequest(http.MethodGet, "/api/v1/stats/expiration-timeline", nil)
w := httptest.NewRecorder()
h.GetExpirationTimeline(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d", w.Code)
}
}
func TestGetJobTrends_Success(t *testing.T) {
mock := &MockStatsService{}
h := NewStatsHandler(mock)
req := httptest.NewRequest(http.MethodGet, "/api/v1/stats/job-trends?days=14", nil)
w := httptest.NewRecorder()
h.GetJobTrends(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d", w.Code)
}
}
func TestGetIssuanceRate_Success(t *testing.T) {
mock := &MockStatsService{}
h := NewStatsHandler(mock)
req := httptest.NewRequest(http.MethodGet, "/api/v1/stats/issuance-rate?days=7", nil)
w := httptest.NewRecorder()
h.GetIssuanceRate(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d", w.Code)
}
}
func TestGetMetrics_Success(t *testing.T) {
mock := &MockStatsService{
GetDashboardSummaryFn: func(ctx context.Context) (interface{}, error) {
return &DashboardSummary{
TotalCertificates: 10,
ExpiringCertificates: 2,
ExpiredCertificates: 1,
RevokedCertificates: 0,
ActiveAgents: 3,
TotalAgents: 5,
PendingJobs: 1,
FailedJobs: 0,
CompleteJobs: 8,
}, nil
},
}
h := NewMetricsHandler(mock, time.Now())
req := httptest.NewRequest(http.MethodGet, "/api/v1/metrics", nil)
w := httptest.NewRecorder()
h.GetMetrics(w, req)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d", w.Code)
}
}
func TestGetMetrics_MethodNotAllowed(t *testing.T) {
mock := &MockStatsService{}
h := NewMetricsHandler(mock, time.Now())
req := httptest.NewRequest(http.MethodPost, "/api/v1/metrics", nil)
w := httptest.NewRecorder()
h.GetMetrics(w, req)
if w.Code != http.StatusMethodNotAllowed {
t.Errorf("expected 405, got %d", w.Code)
}
}
func TestGetMetrics_ServiceError(t *testing.T) {
mock := &MockStatsService{
GetDashboardSummaryFn: func(ctx context.Context) (interface{}, error) {
return nil, fmt.Errorf("db error")
},
}
h := NewMetricsHandler(mock, time.Now())
req := httptest.NewRequest(http.MethodGet, "/api/v1/metrics", nil)
w := httptest.NewRecorder()
h.GetMetrics(w, req)
if w.Code != http.StatusInternalServerError {
t.Errorf("expected 500, got %d", w.Code)
}
}
+29
View File
@@ -6,6 +6,7 @@ import (
"crypto/subtle"
"encoding/hex"
"log"
"log/slog"
"net/http"
"sync"
"time"
@@ -30,6 +31,7 @@ func RequestID(next http.Handler) http.Handler {
}
// Logging middleware logs request details including method, path, status, and duration.
// Deprecated: Use NewLogging for structured logging with slog.
func Logging(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
start := time.Now()
@@ -45,6 +47,33 @@ func Logging(next http.Handler) http.Handler {
})
}
// NewLogging creates a structured logging middleware using slog.
// Logs request_id, method, path, status, duration_ms, and remote_addr.
func NewLogging(logger *slog.Logger) func(http.Handler) http.Handler {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
start := time.Now()
// Wrap response writer to capture status code
wrapped := &responseWriter{ResponseWriter: w, statusCode: http.StatusOK}
next.ServeHTTP(wrapped, r)
duration := time.Since(start)
requestID := getRequestID(r.Context())
logger.InfoContext(r.Context(), "request completed",
"request_id", requestID,
"method", r.Method,
"path", r.URL.Path,
"status", wrapped.statusCode,
"duration_ms", duration.Milliseconds(),
"remote_addr", r.RemoteAddr,
)
})
}
}
// Recovery middleware recovers from panics and returns a 500 error.
func Recovery(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+12
View File
@@ -57,6 +57,8 @@ func (r *Router) RegisterHandlers(
agentGroups handler.AgentGroupHandler,
audit handler.AuditHandler,
notifications handler.NotificationHandler,
stats handler.StatsHandler,
metrics handler.MetricsHandler,
health handler.HealthHandler,
) {
// Health endpoints (no auth middleware — must always be accessible)
@@ -174,6 +176,16 @@ func (r *Router) RegisterHandlers(
r.Register("GET /api/v1/notifications", http.HandlerFunc(notifications.ListNotifications))
r.Register("GET /api/v1/notifications/{id}", http.HandlerFunc(notifications.GetNotification))
r.Register("POST /api/v1/notifications/{id}/read", http.HandlerFunc(notifications.MarkAsRead))
// Stats routes: /api/v1/stats
r.Register("GET /api/v1/stats/summary", http.HandlerFunc(stats.GetDashboardSummary))
r.Register("GET /api/v1/stats/certificates-by-status", http.HandlerFunc(stats.GetCertificatesByStatus))
r.Register("GET /api/v1/stats/expiration-timeline", http.HandlerFunc(stats.GetExpirationTimeline))
r.Register("GET /api/v1/stats/job-trends", http.HandlerFunc(stats.GetJobTrends))
r.Register("GET /api/v1/stats/issuance-rate", http.HandlerFunc(stats.GetIssuanceRate))
// Metrics routes: /api/v1/metrics
r.Register("GET /api/v1/metrics", http.HandlerFunc(metrics.GetMetrics))
}
// GetMux returns the underlying http.ServeMux for direct access if needed.
+27
View File
@@ -75,6 +75,8 @@ func TestCertificateLifecycle(t *testing.T) {
agentGroupHandler := handler.NewAgentGroupHandler(&mockAgentGroupService{})
auditHandler := handler.NewAuditHandler(auditService)
notificationHandler := handler.NewNotificationHandler(notificationService)
statsHandler := handler.NewStatsHandler(&mockStatsService{})
metricsHandler := handler.NewMetricsHandler(&mockStatsService{}, time.Now())
healthHandler := handler.NewHealthHandler("none")
// Create router and register handlers
@@ -92,6 +94,8 @@ func TestCertificateLifecycle(t *testing.T) {
agentGroupHandler,
auditHandler,
notificationHandler,
statsHandler,
metricsHandler,
healthHandler,
)
@@ -1109,3 +1113,26 @@ func (m *mockRevocationRepository) MarkIssuerNotified(ctx context.Context, id st
}
return fmt.Errorf("revocation not found")
}
// mockStatsService implements both handler.StatsService and handler.MetricsService for integration tests.
type mockStatsService struct{}
func (m *mockStatsService) GetDashboardSummary(ctx context.Context) (interface{}, error) {
return &handler.DashboardSummary{}, nil
}
func (m *mockStatsService) GetCertificatesByStatus(ctx context.Context) (interface{}, error) {
return map[string]int64{}, nil
}
func (m *mockStatsService) GetExpirationTimeline(ctx context.Context, days int) (interface{}, error) {
return []interface{}{}, nil
}
func (m *mockStatsService) GetJobStats(ctx context.Context, days int) (interface{}, error) {
return []interface{}{}, nil
}
func (m *mockStatsService) GetIssuanceRate(ctx context.Context, days int) (interface{}, error) {
return []interface{}{}, nil
}
+4
View File
@@ -69,6 +69,8 @@ func setupTestServer(t *testing.T) (*httptest.Server, *mockCertificateRepository
agentGroupHandler := handler.NewAgentGroupHandler(&mockAgentGroupService{})
auditHandler := handler.NewAuditHandler(auditService)
notificationHandler := handler.NewNotificationHandler(notificationService)
statsHandler := handler.NewStatsHandler(&mockStatsService{})
metricsHandler := handler.NewMetricsHandler(&mockStatsService{}, time.Now())
healthHandler := handler.NewHealthHandler("none")
r := router.New()
@@ -85,6 +87,8 @@ func setupTestServer(t *testing.T) (*httptest.Server, *mockCertificateRepository
agentGroupHandler,
auditHandler,
notificationHandler,
statsHandler,
metricsHandler,
healthHandler,
)
+312
View File
@@ -0,0 +1,312 @@
package service
import (
"context"
"fmt"
"time"
"github.com/shankar0123/certctl/internal/domain"
"github.com/shankar0123/certctl/internal/repository"
)
// StatsService provides statistics and observability data for dashboards and monitoring.
type StatsService struct {
certRepo repository.CertificateRepository
jobRepo repository.JobRepository
agentRepo repository.AgentRepository
}
// NewStatsService creates a new stats service.
func NewStatsService(
certRepo repository.CertificateRepository,
jobRepo repository.JobRepository,
agentRepo repository.AgentRepository,
) *StatsService {
return &StatsService{
certRepo: certRepo,
jobRepo: jobRepo,
agentRepo: agentRepo,
}
}
// DashboardSummary represents a high-level summary of system state.
type DashboardSummary struct {
TotalCertificates int64 `json:"total_certificates"`
ExpiringCertificates int64 `json:"expiring_certificates"`
ExpiredCertificates int64 `json:"expired_certificates"`
RevokedCertificates int64 `json:"revoked_certificates"`
ActiveAgents int64 `json:"active_agents"`
OfflineAgents int64 `json:"offline_agents"`
TotalAgents int64 `json:"total_agents"`
PendingJobs int64 `json:"pending_jobs"`
FailedJobs int64 `json:"failed_jobs"`
CompleteJobs int64 `json:"complete_jobs"`
CompletedAt time.Time `json:"completed_at"`
}
// GetDashboardSummary returns a summary of key metrics.
func (s *StatsService) GetDashboardSummary(ctx context.Context) (interface{}, error) {
summary := &DashboardSummary{
CompletedAt: time.Now(),
}
// Get all certificates
allCerts, total, err := s.certRepo.List(ctx, &repository.CertificateFilter{Page: 1, PerPage: 10000})
if err != nil {
return nil, fmt.Errorf("failed to list certificates: %w", err)
}
summary.TotalCertificates = int64(total)
now := time.Now()
thirtyDaysFromNow := now.AddDate(0, 0, 30)
for _, cert := range allCerts {
if cert.Status == domain.CertificateStatusRevoked {
summary.RevokedCertificates++
} else if cert.Status == domain.CertificateStatusExpired || (!cert.ExpiresAt.IsZero() && cert.ExpiresAt.Before(now)) {
summary.ExpiredCertificates++
} else if !cert.ExpiresAt.IsZero() && cert.ExpiresAt.Before(thirtyDaysFromNow) && cert.ExpiresAt.After(now) {
summary.ExpiringCertificates++
}
}
// Get all agents
allAgents, err := s.agentRepo.List(ctx)
if err != nil {
return nil, fmt.Errorf("failed to list agents: %w", err)
}
summary.TotalAgents = int64(len(allAgents))
// Count active agents (heartbeat within last 5 minutes)
fiveMinutesAgo := now.Add(-5 * time.Minute)
for _, agent := range allAgents {
if agent.LastHeartbeatAt != nil && agent.LastHeartbeatAt.After(fiveMinutesAgo) {
summary.ActiveAgents++
} else {
summary.OfflineAgents++
}
}
// Get all jobs
allJobs, err := s.jobRepo.List(ctx)
if err != nil {
return nil, fmt.Errorf("failed to list jobs: %w", err)
}
for _, job := range allJobs {
switch job.Status {
case domain.JobStatusPending, domain.JobStatusAwaitingCSR, domain.JobStatusAwaitingApproval, domain.JobStatusRunning:
summary.PendingJobs++
case domain.JobStatusFailed:
summary.FailedJobs++
case domain.JobStatusCompleted:
summary.CompleteJobs++
}
}
return summary, nil
}
// CertificateStatusCount represents count of certificates by status.
type CertificateStatusCount struct {
Status string `json:"status"`
Count int64 `json:"count"`
}
// GetCertificatesByStatus returns certificate counts grouped by status.
func (s *StatsService) GetCertificatesByStatus(ctx context.Context) (interface{}, error) {
allCerts, _, err := s.certRepo.List(ctx, &repository.CertificateFilter{Page: 1, PerPage: 10000})
if err != nil {
return nil, fmt.Errorf("failed to list certificates: %w", err)
}
counts := make(map[string]int64)
now := time.Now()
thirtyDaysFromNow := now.AddDate(0, 0, 30)
for _, cert := range allCerts {
status := string(cert.Status)
if status == "" || status == "Active" {
if !cert.ExpiresAt.IsZero() {
if cert.ExpiresAt.Before(now) {
status = "Expired"
} else if cert.ExpiresAt.Before(thirtyDaysFromNow) {
status = "Expiring"
} else {
status = "Active"
}
} else {
status = "Active"
}
}
counts[status]++
}
result := make([]CertificateStatusCount, 0, len(counts))
for status, count := range counts {
result = append(result, CertificateStatusCount{Status: status, Count: count})
}
return result, nil
}
// ExpirationBucket represents certificates expiring on a specific date.
type ExpirationBucket struct {
Date string `json:"date"`
Count int64 `json:"count"`
}
// GetExpirationTimeline returns certificates expiring over the next N days, bucketed by day.
func (s *StatsService) GetExpirationTimeline(ctx context.Context, days int) (interface{}, error) {
if days <= 0 {
days = 30
}
allCerts, _, err := s.certRepo.List(ctx, &repository.CertificateFilter{Page: 1, PerPage: 10000})
if err != nil {
return nil, fmt.Errorf("failed to list certificates: %w", err)
}
buckets := make(map[string]int64)
now := time.Now()
endDate := now.AddDate(0, 0, days)
for _, cert := range allCerts {
if cert.ExpiresAt.IsZero() {
continue
}
if cert.ExpiresAt.After(now) && cert.ExpiresAt.Before(endDate) {
dateStr := cert.ExpiresAt.Format("2006-01-02")
buckets[dateStr]++
}
}
result := make([]ExpirationBucket, 0, days)
for i := 0; i < days; i++ {
date := now.AddDate(0, 0, i)
dateStr := date.Format("2006-01-02")
if count, exists := buckets[dateStr]; exists {
result = append(result, ExpirationBucket{Date: dateStr, Count: count})
} else {
result = append(result, ExpirationBucket{Date: dateStr, Count: 0})
}
}
return result, nil
}
// JobTrendDataPoint represents success/failure counts for a specific day.
type JobTrendDataPoint struct {
Date string `json:"date"`
CompletedCount int64 `json:"completed_count"`
FailedCount int64 `json:"failed_count"`
SuccessRate float64 `json:"success_rate"`
}
// GetJobStats returns job success/failure trends over the past N days.
func (s *StatsService) GetJobStats(ctx context.Context, days int) (interface{}, error) {
if days <= 0 {
days = 30
}
allJobs, err := s.jobRepo.List(ctx)
if err != nil {
return nil, fmt.Errorf("failed to list jobs: %w", err)
}
type dayData struct {
completed int64
failed int64
}
buckets := make(map[string]*dayData)
now := time.Now()
for _, job := range allJobs {
if job.Status != domain.JobStatusCompleted && job.Status != domain.JobStatusFailed {
continue
}
if job.CompletedAt == nil {
continue
}
if job.CompletedAt.Before(now.AddDate(0, 0, -days)) {
continue
}
dateStr := job.CompletedAt.Format("2006-01-02")
if _, exists := buckets[dateStr]; !exists {
buckets[dateStr] = &dayData{}
}
if job.Status == domain.JobStatusCompleted {
buckets[dateStr].completed++
} else {
buckets[dateStr].failed++
}
}
result := make([]JobTrendDataPoint, 0, days)
for i := 0; i < days; i++ {
date := now.AddDate(0, 0, -days+i+1)
dateStr := date.Format("2006-01-02")
point := JobTrendDataPoint{Date: dateStr}
if data, exists := buckets[dateStr]; exists {
point.CompletedCount = data.completed
point.FailedCount = data.failed
total := data.completed + data.failed
if total > 0 {
point.SuccessRate = (float64(data.completed) / float64(total)) * 100
}
}
result = append(result, point)
}
return result, nil
}
// IssuanceRateDataPoint represents new certificates issued on a specific day.
type IssuanceRateDataPoint struct {
Date string `json:"date"`
IssuedCount int64 `json:"issued_count"`
}
// GetIssuanceRate returns the rate of new certificate issuance over the past N days.
func (s *StatsService) GetIssuanceRate(ctx context.Context, days int) (interface{}, error) {
if days <= 0 {
days = 30
}
allCerts, _, err := s.certRepo.List(ctx, &repository.CertificateFilter{Page: 1, PerPage: 10000})
if err != nil {
return nil, fmt.Errorf("failed to list certificates: %w", err)
}
buckets := make(map[string]int64)
now := time.Now()
for _, cert := range allCerts {
if cert.CreatedAt.IsZero() {
continue
}
if cert.CreatedAt.Before(now.AddDate(0, 0, -days)) {
continue
}
dateStr := cert.CreatedAt.Format("2006-01-02")
buckets[dateStr]++
}
result := make([]IssuanceRateDataPoint, 0, days)
for i := 0; i < days; i++ {
date := now.AddDate(0, 0, -days+i+1)
dateStr := date.Format("2006-01-02")
point := IssuanceRateDataPoint{Date: dateStr}
if count, exists := buckets[dateStr]; exists {
point.IssuedCount = count
}
result = append(result, point)
}
return result, nil
}
+249
View File
@@ -0,0 +1,249 @@
package service
import (
"context"
"testing"
"time"
"github.com/shankar0123/certctl/internal/domain"
)
func newTestStatsService() (*StatsService, *mockCertRepo, *mockJobRepo, *mockAgentRepo) {
certRepo := &mockCertRepo{Certs: make(map[string]*domain.ManagedCertificate)}
jobRepo := newMockJobRepository()
agentRepo := newMockAgentRepository()
svc := NewStatsService(certRepo, jobRepo, agentRepo)
return svc, certRepo, jobRepo, agentRepo
}
func TestGetDashboardSummary_Empty(t *testing.T) {
svc, _, _, _ := newTestStatsService()
result, err := svc.GetDashboardSummary(context.Background())
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
summary, ok := result.(*DashboardSummary)
if !ok {
t.Fatal("expected *DashboardSummary")
}
if summary.TotalCertificates != 0 {
t.Errorf("expected 0 total certs, got %d", summary.TotalCertificates)
}
if summary.TotalAgents != 0 {
t.Errorf("expected 0 total agents, got %d", summary.TotalAgents)
}
}
func TestGetDashboardSummary_WithData(t *testing.T) {
svc, certRepo, jobRepo, agentRepo := newTestStatsService()
now := time.Now()
tenDays := now.AddDate(0, 0, 10)
pastDate := now.AddDate(0, 0, -5)
futureDate := now.AddDate(0, 0, 60)
// Add certificates
certRepo.Certs["mc-active"] = &domain.ManagedCertificate{ID: "mc-active", Status: domain.CertificateStatusActive, ExpiresAt: futureDate}
certRepo.Certs["mc-expiring"] = &domain.ManagedCertificate{ID: "mc-expiring", Status: domain.CertificateStatusActive, ExpiresAt: tenDays}
certRepo.Certs["mc-expired"] = &domain.ManagedCertificate{ID: "mc-expired", Status: domain.CertificateStatusExpired, ExpiresAt: pastDate}
certRepo.Certs["mc-revoked"] = &domain.ManagedCertificate{ID: "mc-revoked", Status: domain.CertificateStatusRevoked}
// Add agents
recentHeartbeat := now.Add(-2 * time.Minute)
oldHeartbeat := now.Add(-10 * time.Minute)
agentRepo.AddAgent(&domain.Agent{ID: "a-1", LastHeartbeatAt: &recentHeartbeat})
agentRepo.AddAgent(&domain.Agent{ID: "a-2", LastHeartbeatAt: &oldHeartbeat})
agentRepo.AddAgent(&domain.Agent{ID: "a-3"}) // no heartbeat
// Add jobs
jobRepo.AddJob(&domain.Job{ID: "j-1", Status: domain.JobStatusPending})
jobRepo.AddJob(&domain.Job{ID: "j-2", Status: domain.JobStatusCompleted})
jobRepo.AddJob(&domain.Job{ID: "j-3", Status: domain.JobStatusFailed})
result, err := svc.GetDashboardSummary(context.Background())
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
summary := result.(*DashboardSummary)
if summary.TotalCertificates != 4 {
t.Errorf("expected 4 total certs, got %d", summary.TotalCertificates)
}
if summary.ExpiringCertificates != 1 {
t.Errorf("expected 1 expiring, got %d", summary.ExpiringCertificates)
}
if summary.ExpiredCertificates != 1 {
t.Errorf("expected 1 expired, got %d", summary.ExpiredCertificates)
}
if summary.RevokedCertificates != 1 {
t.Errorf("expected 1 revoked, got %d", summary.RevokedCertificates)
}
if summary.TotalAgents != 3 {
t.Errorf("expected 3 total agents, got %d", summary.TotalAgents)
}
if summary.ActiveAgents != 1 {
t.Errorf("expected 1 active agent, got %d", summary.ActiveAgents)
}
if summary.OfflineAgents != 2 {
t.Errorf("expected 2 offline agents, got %d", summary.OfflineAgents)
}
if summary.PendingJobs != 1 {
t.Errorf("expected 1 pending job, got %d", summary.PendingJobs)
}
if summary.CompleteJobs != 1 {
t.Errorf("expected 1 complete job, got %d", summary.CompleteJobs)
}
if summary.FailedJobs != 1 {
t.Errorf("expected 1 failed job, got %d", summary.FailedJobs)
}
}
func TestGetDashboardSummary_CertRepoError(t *testing.T) {
svc, certRepo, _, _ := newTestStatsService()
certRepo.ListErr = errNotFound
_, err := svc.GetDashboardSummary(context.Background())
if err == nil {
t.Fatal("expected error")
}
}
func TestGetCertificatesByStatus_Empty(t *testing.T) {
svc, _, _, _ := newTestStatsService()
result, err := svc.GetCertificatesByStatus(context.Background())
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
counts := result.([]CertificateStatusCount)
if len(counts) != 0 {
t.Errorf("expected 0 status counts, got %d", len(counts))
}
}
func TestGetCertificatesByStatus_WithData(t *testing.T) {
svc, certRepo, _, _ := newTestStatsService()
future := time.Now().AddDate(0, 0, 60)
certRepo.Certs["mc-1"] = &domain.ManagedCertificate{ID: "mc-1", Status: domain.CertificateStatusActive, ExpiresAt: future}
certRepo.Certs["mc-2"] = &domain.ManagedCertificate{ID: "mc-2", Status: domain.CertificateStatusRevoked}
result, err := svc.GetCertificatesByStatus(context.Background())
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
counts := result.([]CertificateStatusCount)
if len(counts) < 2 {
t.Errorf("expected at least 2 status counts, got %d", len(counts))
}
}
func TestGetExpirationTimeline_Default(t *testing.T) {
svc, certRepo, _, _ := newTestStatsService()
expiresIn10d := time.Now().AddDate(0, 0, 10)
certRepo.Certs["mc-1"] = &domain.ManagedCertificate{ID: "mc-1", ExpiresAt: expiresIn10d}
result, err := svc.GetExpirationTimeline(context.Background(), 30)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
buckets := result.([]ExpirationBucket)
if len(buckets) != 30 {
t.Errorf("expected 30 buckets, got %d", len(buckets))
}
// At least one bucket should have count > 0
hasNonZero := false
for _, b := range buckets {
if b.Count > 0 {
hasNonZero = true
break
}
}
if !hasNonZero {
t.Error("expected at least one non-zero bucket")
}
}
func TestGetExpirationTimeline_InvalidDays(t *testing.T) {
svc, _, _, _ := newTestStatsService()
result, err := svc.GetExpirationTimeline(context.Background(), -1)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
buckets := result.([]ExpirationBucket)
if len(buckets) != 30 {
t.Errorf("expected default 30 buckets for invalid days, got %d", len(buckets))
}
}
func TestGetJobStats_Empty(t *testing.T) {
svc, _, _, _ := newTestStatsService()
result, err := svc.GetJobStats(context.Background(), 7)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
points := result.([]JobTrendDataPoint)
if len(points) != 7 {
t.Errorf("expected 7 data points, got %d", len(points))
}
}
func TestGetJobStats_WithData(t *testing.T) {
svc, _, jobRepo, _ := newTestStatsService()
completedAt := time.Now().Add(-1 * time.Hour)
jobRepo.AddJob(&domain.Job{ID: "j-1", Status: domain.JobStatusCompleted, CompletedAt: &completedAt})
jobRepo.AddJob(&domain.Job{ID: "j-2", Status: domain.JobStatusFailed, CompletedAt: &completedAt})
result, err := svc.GetJobStats(context.Background(), 7)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
points := result.([]JobTrendDataPoint)
// The last data point should have today's data
todayPoint := points[len(points)-1]
if todayPoint.CompletedCount != 1 {
t.Errorf("expected 1 completed today, got %d", todayPoint.CompletedCount)
}
if todayPoint.FailedCount != 1 {
t.Errorf("expected 1 failed today, got %d", todayPoint.FailedCount)
}
if todayPoint.SuccessRate != 50.0 {
t.Errorf("expected 50%% success rate, got %.1f%%", todayPoint.SuccessRate)
}
}
func TestGetIssuanceRate_Empty(t *testing.T) {
svc, _, _, _ := newTestStatsService()
result, err := svc.GetIssuanceRate(context.Background(), 7)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
points := result.([]IssuanceRateDataPoint)
if len(points) != 7 {
t.Errorf("expected 7 data points, got %d", len(points))
}
}
func TestGetIssuanceRate_WithData(t *testing.T) {
svc, certRepo, _, _ := newTestStatsService()
certRepo.Certs["mc-1"] = &domain.ManagedCertificate{ID: "mc-1", CreatedAt: time.Now()}
certRepo.Certs["mc-2"] = &domain.ManagedCertificate{ID: "mc-2", CreatedAt: time.Now()}
result, err := svc.GetIssuanceRate(context.Background(), 7)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
points := result.([]IssuanceRateDataPoint)
todayPoint := points[len(points)-1]
if todayPoint.IssuedCount != 2 {
t.Errorf("expected 2 issued today, got %d", todayPoint.IssuedCount)
}
}
func TestGetIssuanceRate_RepoError(t *testing.T) {
svc, certRepo, _, _ := newTestStatsService()
certRepo.ListErr = errNotFound
_, err := svc.GetIssuanceRate(context.Background(), 7)
if err == nil {
t.Fatal("expected error")
}
}