mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-10 01:39:00 +00:00
Implement M3: expiration threshold alerting with dedup and status transitions
- Add alert_thresholds_days JSONB column to renewal_policies (default [30,14,7,0]) - Add RenewalPolicy.AlertThresholdsDays field + EffectiveAlertThresholds() helper - Add RenewalPolicyRepository interface + postgres implementation - Rewrite CheckExpiringCertificates with per-policy threshold alerting - Add SendThresholdAlert + HasThresholdNotification for deduplication via [threshold:N] tags - Add Type and MessageLike filters to NotificationFilter + postgres query support - Auto-transition certs to Expiring (>0 days) or Expired (<=0 days) status - Record expiration_alert_sent audit events per threshold crossing - Fix .gitignore: allow SQL migration files, scope server/agent build artifact rules - Track previously untracked cmd/ and migrations/ directories - Update docs (README, architecture, demo-advanced) for threshold alerting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -54,12 +54,26 @@ const (
|
||||
|
||||
// RenewalPolicy defines renewal parameters for a managed certificate.
|
||||
type RenewalPolicy struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
RenewalWindowDays int `json:"renewal_window_days"`
|
||||
AutoRenew bool `json:"auto_renew"`
|
||||
MaxRetries int `json:"max_retries"`
|
||||
RetryInterval int `json:"retry_interval_seconds"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
RenewalWindowDays int `json:"renewal_window_days"`
|
||||
AutoRenew bool `json:"auto_renew"`
|
||||
MaxRetries int `json:"max_retries"`
|
||||
RetryInterval int `json:"retry_interval_seconds"`
|
||||
AlertThresholdsDays []int `json:"alert_thresholds_days"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
// DefaultAlertThresholds returns the standard alert thresholds when none are configured.
|
||||
func DefaultAlertThresholds() []int {
|
||||
return []int{30, 14, 7, 0}
|
||||
}
|
||||
|
||||
// EffectiveAlertThresholds returns the configured thresholds or defaults if empty.
|
||||
func (p *RenewalPolicy) EffectiveAlertThresholds() []int {
|
||||
if len(p.AlertThresholdsDays) > 0 {
|
||||
return p.AlertThresholdsDays
|
||||
}
|
||||
return DefaultAlertThresholds()
|
||||
}
|
||||
|
||||
@@ -37,8 +37,10 @@ type AuditFilter struct {
|
||||
// NotificationFilter defines filtering criteria for notification queries.
|
||||
type NotificationFilter struct {
|
||||
CertificateID string // optional: filter by certificate
|
||||
Type string // optional: filter by notification type (e.g., "ExpirationWarning")
|
||||
Status string // e.g., "pending", "sent", "failed"
|
||||
Channel string // e.g., "email", "slack", "webhook"
|
||||
MessageLike string // optional: LIKE match on message content (for threshold dedup)
|
||||
Page int
|
||||
PerPage int
|
||||
}
|
||||
|
||||
@@ -97,6 +97,14 @@ type JobRepository interface {
|
||||
GetPendingJobs(ctx context.Context, jobType domain.JobType) ([]*domain.Job, error)
|
||||
}
|
||||
|
||||
// RenewalPolicyRepository defines operations for managing renewal policies.
|
||||
type RenewalPolicyRepository interface {
|
||||
// Get retrieves a renewal policy by ID.
|
||||
Get(ctx context.Context, id string) (*domain.RenewalPolicy, error)
|
||||
// List returns all renewal policies.
|
||||
List(ctx context.Context) ([]*domain.RenewalPolicy, error)
|
||||
}
|
||||
|
||||
// PolicyRepository defines operations for managing compliance policies and violations.
|
||||
type PolicyRepository interface {
|
||||
// ListRules returns all policy rules.
|
||||
|
||||
@@ -67,11 +67,21 @@ func (r *NotificationRepository) List(ctx context.Context, filter *repository.No
|
||||
args = append(args, filter.CertificateID)
|
||||
argCount++
|
||||
}
|
||||
if filter.Type != "" {
|
||||
whereConditions = append(whereConditions, fmt.Sprintf("type = $%d", argCount))
|
||||
args = append(args, filter.Type)
|
||||
argCount++
|
||||
}
|
||||
if filter.Status != "" {
|
||||
whereConditions = append(whereConditions, fmt.Sprintf("status = $%d", argCount))
|
||||
args = append(args, filter.Status)
|
||||
argCount++
|
||||
}
|
||||
if filter.MessageLike != "" {
|
||||
whereConditions = append(whereConditions, fmt.Sprintf("message LIKE $%d", argCount))
|
||||
args = append(args, filter.MessageLike)
|
||||
argCount++
|
||||
}
|
||||
if filter.Channel != "" {
|
||||
whereConditions = append(whereConditions, fmt.Sprintf("channel = $%d", argCount))
|
||||
args = append(args, filter.Channel)
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
package postgres
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
)
|
||||
|
||||
// RenewalPolicyRepository implements repository.RenewalPolicyRepository
|
||||
type RenewalPolicyRepository struct {
|
||||
db *sql.DB
|
||||
}
|
||||
|
||||
// NewRenewalPolicyRepository creates a new RenewalPolicyRepository
|
||||
func NewRenewalPolicyRepository(db *sql.DB) *RenewalPolicyRepository {
|
||||
return &RenewalPolicyRepository{db: db}
|
||||
}
|
||||
|
||||
// Get retrieves a renewal policy by ID
|
||||
func (r *RenewalPolicyRepository) Get(ctx context.Context, id string) (*domain.RenewalPolicy, error) {
|
||||
var policy domain.RenewalPolicy
|
||||
var thresholdsJSON []byte
|
||||
|
||||
err := r.db.QueryRowContext(ctx, `
|
||||
SELECT id, name, renewal_window_days, auto_renew, max_retries,
|
||||
retry_interval_minutes, alert_thresholds_days, created_at, updated_at
|
||||
FROM renewal_policies
|
||||
WHERE id = $1
|
||||
`, id).Scan(&policy.ID, &policy.Name, &policy.RenewalWindowDays, &policy.AutoRenew,
|
||||
&policy.MaxRetries, &policy.RetryInterval, &thresholdsJSON,
|
||||
&policy.CreatedAt, &policy.UpdatedAt)
|
||||
|
||||
if err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
return nil, fmt.Errorf("renewal policy not found: %s", id)
|
||||
}
|
||||
return nil, fmt.Errorf("failed to query renewal policy: %w", err)
|
||||
}
|
||||
|
||||
// Parse alert thresholds from JSONB
|
||||
if len(thresholdsJSON) > 0 {
|
||||
if err := json.Unmarshal(thresholdsJSON, &policy.AlertThresholdsDays); err != nil {
|
||||
// Fall back to defaults if JSON is malformed
|
||||
policy.AlertThresholdsDays = domain.DefaultAlertThresholds()
|
||||
}
|
||||
}
|
||||
|
||||
return &policy, nil
|
||||
}
|
||||
|
||||
// List returns all renewal policies
|
||||
func (r *RenewalPolicyRepository) List(ctx context.Context) ([]*domain.RenewalPolicy, error) {
|
||||
rows, err := r.db.QueryContext(ctx, `
|
||||
SELECT id, name, renewal_window_days, auto_renew, max_retries,
|
||||
retry_interval_minutes, alert_thresholds_days, created_at, updated_at
|
||||
FROM renewal_policies
|
||||
ORDER BY name
|
||||
`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query renewal policies: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var policies []*domain.RenewalPolicy
|
||||
for rows.Next() {
|
||||
var policy domain.RenewalPolicy
|
||||
var thresholdsJSON []byte
|
||||
|
||||
if err := rows.Scan(&policy.ID, &policy.Name, &policy.RenewalWindowDays, &policy.AutoRenew,
|
||||
&policy.MaxRetries, &policy.RetryInterval, &thresholdsJSON,
|
||||
&policy.CreatedAt, &policy.UpdatedAt); err != nil {
|
||||
return nil, fmt.Errorf("failed to scan renewal policy: %w", err)
|
||||
}
|
||||
|
||||
if len(thresholdsJSON) > 0 {
|
||||
if err := json.Unmarshal(thresholdsJSON, &policy.AlertThresholdsDays); err != nil {
|
||||
policy.AlertThresholdsDays = domain.DefaultAlertThresholds()
|
||||
}
|
||||
}
|
||||
|
||||
policies = append(policies, &policy)
|
||||
}
|
||||
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, fmt.Errorf("error iterating renewal policy rows: %w", err)
|
||||
}
|
||||
|
||||
return policies, nil
|
||||
}
|
||||
@@ -34,12 +34,26 @@ func NewNotificationService(
|
||||
}
|
||||
}
|
||||
|
||||
// SendExpirationWarning sends a certificate expiration warning.
|
||||
// SendExpirationWarning sends a certificate expiration warning for a specific threshold.
|
||||
func (s *NotificationService) SendExpirationWarning(ctx context.Context, cert *domain.ManagedCertificate, daysUntilExpiry int) error {
|
||||
body := fmt.Sprintf(
|
||||
"The certificate for %s will expire in %d days (%s).\n\nPlease schedule renewal.",
|
||||
cert.CommonName, daysUntilExpiry, cert.ExpiresAt.Format("2006-01-02"),
|
||||
)
|
||||
return s.SendThresholdAlert(ctx, cert, daysUntilExpiry, daysUntilExpiry)
|
||||
}
|
||||
|
||||
// SendThresholdAlert sends an expiration alert for a specific threshold (e.g., 30-day, 14-day, expired).
|
||||
// The threshold parameter indicates which configured threshold triggered the alert.
|
||||
func (s *NotificationService) SendThresholdAlert(ctx context.Context, cert *domain.ManagedCertificate, daysUntilExpiry int, threshold int) error {
|
||||
var body string
|
||||
if threshold <= 0 {
|
||||
body = fmt.Sprintf(
|
||||
"[EXPIRED] The certificate for %s has expired (%s).\n\nImmediate action required.\n\n[threshold:%d]",
|
||||
cert.CommonName, cert.ExpiresAt.Format("2006-01-02"), threshold,
|
||||
)
|
||||
} else {
|
||||
body = fmt.Sprintf(
|
||||
"The certificate for %s will expire in %d days (%s).\n\nPlease schedule renewal.\n\n[threshold:%d]",
|
||||
cert.CommonName, daysUntilExpiry, cert.ExpiresAt.Format("2006-01-02"), threshold,
|
||||
)
|
||||
}
|
||||
|
||||
// Create notification record
|
||||
notif := &domain.NotificationEvent{
|
||||
@@ -61,6 +75,24 @@ func (s *NotificationService) SendExpirationWarning(ctx context.Context, cert *d
|
||||
return s.sendNotification(ctx, notif)
|
||||
}
|
||||
|
||||
// HasThresholdNotification checks whether an expiration warning has already been sent
|
||||
// for a specific certificate and threshold combination. Used for deduplication.
|
||||
func (s *NotificationService) HasThresholdNotification(ctx context.Context, certID string, threshold int) (bool, error) {
|
||||
filter := &repository.NotificationFilter{
|
||||
CertificateID: certID,
|
||||
Type: string(domain.NotificationTypeExpirationWarning),
|
||||
MessageLike: fmt.Sprintf("%%[threshold:%d]%%", threshold),
|
||||
PerPage: 1,
|
||||
}
|
||||
|
||||
existing, err := s.notifRepo.List(ctx, filter)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to check existing notifications: %w", err)
|
||||
}
|
||||
|
||||
return len(existing) > 0, nil
|
||||
}
|
||||
|
||||
// SendRenewalNotification sends a renewal success or failure notification.
|
||||
func (s *NotificationService) SendRenewalNotification(ctx context.Context, cert *domain.ManagedCertificate, success bool, err error) error {
|
||||
var body string
|
||||
|
||||
+111
-16
@@ -18,11 +18,12 @@ import (
|
||||
|
||||
// RenewalService manages certificate renewal workflows.
|
||||
type RenewalService struct {
|
||||
certRepo repository.CertificateRepository
|
||||
jobRepo repository.JobRepository
|
||||
auditService *AuditService
|
||||
notificationSvc *NotificationService
|
||||
issuerRegistry map[string]IssuerConnector
|
||||
certRepo repository.CertificateRepository
|
||||
jobRepo repository.JobRepository
|
||||
renewalPolicyRepo repository.RenewalPolicyRepository
|
||||
auditService *AuditService
|
||||
notificationSvc *NotificationService
|
||||
issuerRegistry map[string]IssuerConnector
|
||||
}
|
||||
|
||||
// IssuerConnector defines the service-layer interface for interacting with certificate issuers.
|
||||
@@ -48,29 +49,37 @@ type IssuanceResult struct {
|
||||
func NewRenewalService(
|
||||
certRepo repository.CertificateRepository,
|
||||
jobRepo repository.JobRepository,
|
||||
renewalPolicyRepo repository.RenewalPolicyRepository,
|
||||
auditService *AuditService,
|
||||
notificationSvc *NotificationService,
|
||||
issuerRegistry map[string]IssuerConnector,
|
||||
) *RenewalService {
|
||||
return &RenewalService{
|
||||
certRepo: certRepo,
|
||||
jobRepo: jobRepo,
|
||||
auditService: auditService,
|
||||
notificationSvc: notificationSvc,
|
||||
issuerRegistry: issuerRegistry,
|
||||
certRepo: certRepo,
|
||||
jobRepo: jobRepo,
|
||||
renewalPolicyRepo: renewalPolicyRepo,
|
||||
auditService: auditService,
|
||||
notificationSvc: notificationSvc,
|
||||
issuerRegistry: issuerRegistry,
|
||||
}
|
||||
}
|
||||
|
||||
// CheckExpiringCertificates identifies certificates needing renewal based on policy windows.
|
||||
// CheckExpiringCertificates identifies certificates needing renewal and sends threshold-based
|
||||
// expiration alerts. For each certificate, it looks up the renewal policy's configured alert
|
||||
// thresholds (default: 30, 14, 7, 0 days) and sends deduplicated notifications at each threshold.
|
||||
// Certificates are also transitioned to Expiring/Expired status as appropriate.
|
||||
func (s *RenewalService) CheckExpiringCertificates(ctx context.Context) error {
|
||||
// Default renewal window: 30 days before expiry
|
||||
renewalWindow := time.Now().AddDate(0, 0, 30)
|
||||
// Use the maximum possible threshold window (30 days) plus buffer for query
|
||||
renewalWindow := time.Now().AddDate(0, 0, 31)
|
||||
|
||||
expiring, err := s.certRepo.GetExpiringCertificates(ctx, renewalWindow)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to fetch expiring certificates: %w", err)
|
||||
}
|
||||
|
||||
// Cache renewal policies to avoid repeated lookups
|
||||
policyCache := make(map[string]*domain.RenewalPolicy)
|
||||
|
||||
for _, cert := range expiring {
|
||||
// Skip if already renewing or archived
|
||||
if cert.Status == domain.CertificateStatusRenewalInProgress || cert.Status == domain.CertificateStatusArchived {
|
||||
@@ -80,11 +89,31 @@ func (s *RenewalService) CheckExpiringCertificates(ctx context.Context) error {
|
||||
// Calculate days until expiry
|
||||
daysUntil := time.Until(cert.ExpiresAt).Hours() / 24
|
||||
|
||||
// Send expiration warning notification (always, regardless of issuer availability)
|
||||
if err := s.notificationSvc.SendExpirationWarning(ctx, cert, int(daysUntil)); err != nil {
|
||||
fmt.Printf("failed to send expiration warning for cert %s: %v\n", cert.ID, err)
|
||||
// Look up renewal policy for alert thresholds
|
||||
thresholds := domain.DefaultAlertThresholds()
|
||||
if cert.RenewalPolicyID != "" {
|
||||
policy, ok := policyCache[cert.RenewalPolicyID]
|
||||
if !ok {
|
||||
policy, err = s.renewalPolicyRepo.Get(ctx, cert.RenewalPolicyID)
|
||||
if err != nil {
|
||||
// Log but continue with defaults
|
||||
fmt.Printf("failed to fetch renewal policy %s for cert %s, using defaults: %v\n",
|
||||
cert.RenewalPolicyID, cert.ID, err)
|
||||
} else {
|
||||
policyCache[cert.RenewalPolicyID] = policy
|
||||
}
|
||||
}
|
||||
if policy != nil {
|
||||
thresholds = policy.EffectiveAlertThresholds()
|
||||
}
|
||||
}
|
||||
|
||||
// Update certificate status based on expiry
|
||||
s.updateCertExpiryStatus(ctx, cert, daysUntil)
|
||||
|
||||
// Send threshold-based alerts with deduplication
|
||||
s.sendThresholdAlerts(ctx, cert, int(daysUntil), thresholds)
|
||||
|
||||
// Only create renewal job if an issuer connector is registered for this cert's issuer
|
||||
if _, hasIssuer := s.issuerRegistry[cert.IssuerID]; !hasIssuer {
|
||||
continue
|
||||
@@ -137,6 +166,72 @@ func (s *RenewalService) CheckExpiringCertificates(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// sendThresholdAlerts sends deduplicated expiration notifications based on configured thresholds.
|
||||
// For each threshold that the certificate has crossed (e.g., ≤30 days, ≤14 days), it checks
|
||||
// whether a notification for that threshold was already sent. Only new threshold crossings
|
||||
// trigger notifications.
|
||||
func (s *RenewalService) sendThresholdAlerts(ctx context.Context, cert *domain.ManagedCertificate, daysUntil int, thresholds []int) {
|
||||
for _, threshold := range thresholds {
|
||||
// Only alert if the cert has crossed this threshold (days remaining ≤ threshold)
|
||||
if daysUntil > threshold {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if we already sent a notification for this threshold (deduplication)
|
||||
alreadySent, err := s.notificationSvc.HasThresholdNotification(ctx, cert.ID, threshold)
|
||||
if err != nil {
|
||||
fmt.Printf("failed to check notification dedup for cert %s threshold %d: %v\n",
|
||||
cert.ID, threshold, err)
|
||||
continue
|
||||
}
|
||||
if alreadySent {
|
||||
continue
|
||||
}
|
||||
|
||||
// Send the threshold alert
|
||||
if err := s.notificationSvc.SendThresholdAlert(ctx, cert, daysUntil, threshold); err != nil {
|
||||
fmt.Printf("failed to send threshold alert for cert %s at %d days: %v\n",
|
||||
cert.ID, threshold, err)
|
||||
}
|
||||
|
||||
// Record audit event for the alert
|
||||
_ = s.auditService.RecordEvent(ctx, "system", domain.ActorTypeSystem,
|
||||
"expiration_alert_sent", "certificate", cert.ID,
|
||||
map[string]interface{}{
|
||||
"threshold_days": threshold,
|
||||
"days_until_expiry": daysUntil,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// updateCertExpiryStatus transitions a certificate to Expiring or Expired status based on
|
||||
// how many days remain before expiry. Expired = 0 or fewer days, Expiring = within 30 days.
|
||||
func (s *RenewalService) updateCertExpiryStatus(ctx context.Context, cert *domain.ManagedCertificate, daysUntil float64) {
|
||||
var newStatus domain.CertificateStatus
|
||||
|
||||
if daysUntil <= 0 {
|
||||
newStatus = domain.CertificateStatusExpired
|
||||
} else {
|
||||
newStatus = domain.CertificateStatusExpiring
|
||||
}
|
||||
|
||||
// Only update if status is changing and cert isn't already in a terminal/active renewal state
|
||||
if cert.Status == newStatus {
|
||||
return
|
||||
}
|
||||
if cert.Status == domain.CertificateStatusRenewalInProgress ||
|
||||
cert.Status == domain.CertificateStatusArchived ||
|
||||
cert.Status == domain.CertificateStatusRevoked {
|
||||
return
|
||||
}
|
||||
|
||||
cert.Status = newStatus
|
||||
cert.UpdatedAt = time.Now()
|
||||
if err := s.certRepo.Update(ctx, cert); err != nil {
|
||||
fmt.Printf("failed to update cert %s status to %s: %v\n", cert.ID, newStatus, err)
|
||||
}
|
||||
}
|
||||
|
||||
// ProcessRenewalJob executes a renewal job: generate CSR, call issuer, store new version,
|
||||
// update cert status, and create deployment jobs for targets.
|
||||
//
|
||||
|
||||
Reference in New Issue
Block a user