Implement M3: expiration threshold alerting with dedup and status transitions

- Add alert_thresholds_days JSONB column to renewal_policies (default [30,14,7,0])
- Add RenewalPolicy.AlertThresholdsDays field + EffectiveAlertThresholds() helper
- Add RenewalPolicyRepository interface + postgres implementation
- Rewrite CheckExpiringCertificates with per-policy threshold alerting
- Add SendThresholdAlert + HasThresholdNotification for deduplication via [threshold:N] tags
- Add Type and MessageLike filters to NotificationFilter + postgres query support
- Auto-transition certs to Expiring (>0 days) or Expired (<=0 days) status
- Record expiration_alert_sent audit events per threshold crossing
- Fix .gitignore: allow SQL migration files, scope server/agent build artifact rules
- Track previously untracked cmd/ and migrations/ directories
- Update docs (README, architecture, demo-advanced) for threshold alerting

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
shankar0123
2026-03-15 00:03:43 -04:00
parent ae67b10708
commit 1d1b89c9b5
17 changed files with 1485 additions and 37 deletions
+37 -5
View File
@@ -34,12 +34,26 @@ func NewNotificationService(
}
}
// SendExpirationWarning sends a certificate expiration warning.
// SendExpirationWarning sends a certificate expiration warning for a specific threshold.
func (s *NotificationService) SendExpirationWarning(ctx context.Context, cert *domain.ManagedCertificate, daysUntilExpiry int) error {
body := fmt.Sprintf(
"The certificate for %s will expire in %d days (%s).\n\nPlease schedule renewal.",
cert.CommonName, daysUntilExpiry, cert.ExpiresAt.Format("2006-01-02"),
)
return s.SendThresholdAlert(ctx, cert, daysUntilExpiry, daysUntilExpiry)
}
// SendThresholdAlert sends an expiration alert for a specific threshold (e.g., 30-day, 14-day, expired).
// The threshold parameter indicates which configured threshold triggered the alert.
func (s *NotificationService) SendThresholdAlert(ctx context.Context, cert *domain.ManagedCertificate, daysUntilExpiry int, threshold int) error {
var body string
if threshold <= 0 {
body = fmt.Sprintf(
"[EXPIRED] The certificate for %s has expired (%s).\n\nImmediate action required.\n\n[threshold:%d]",
cert.CommonName, cert.ExpiresAt.Format("2006-01-02"), threshold,
)
} else {
body = fmt.Sprintf(
"The certificate for %s will expire in %d days (%s).\n\nPlease schedule renewal.\n\n[threshold:%d]",
cert.CommonName, daysUntilExpiry, cert.ExpiresAt.Format("2006-01-02"), threshold,
)
}
// Create notification record
notif := &domain.NotificationEvent{
@@ -61,6 +75,24 @@ func (s *NotificationService) SendExpirationWarning(ctx context.Context, cert *d
return s.sendNotification(ctx, notif)
}
// HasThresholdNotification checks whether an expiration warning has already been sent
// for a specific certificate and threshold combination. Used for deduplication.
func (s *NotificationService) HasThresholdNotification(ctx context.Context, certID string, threshold int) (bool, error) {
filter := &repository.NotificationFilter{
CertificateID: certID,
Type: string(domain.NotificationTypeExpirationWarning),
MessageLike: fmt.Sprintf("%%[threshold:%d]%%", threshold),
PerPage: 1,
}
existing, err := s.notifRepo.List(ctx, filter)
if err != nil {
return false, fmt.Errorf("failed to check existing notifications: %w", err)
}
return len(existing) > 0, nil
}
// SendRenewalNotification sends a renewal success or failure notification.
func (s *NotificationService) SendRenewalNotification(ctx context.Context, cert *domain.ManagedCertificate, success bool, err error) error {
var body string
+111 -16
View File
@@ -18,11 +18,12 @@ import (
// RenewalService manages certificate renewal workflows.
type RenewalService struct {
certRepo repository.CertificateRepository
jobRepo repository.JobRepository
auditService *AuditService
notificationSvc *NotificationService
issuerRegistry map[string]IssuerConnector
certRepo repository.CertificateRepository
jobRepo repository.JobRepository
renewalPolicyRepo repository.RenewalPolicyRepository
auditService *AuditService
notificationSvc *NotificationService
issuerRegistry map[string]IssuerConnector
}
// IssuerConnector defines the service-layer interface for interacting with certificate issuers.
@@ -48,29 +49,37 @@ type IssuanceResult struct {
func NewRenewalService(
certRepo repository.CertificateRepository,
jobRepo repository.JobRepository,
renewalPolicyRepo repository.RenewalPolicyRepository,
auditService *AuditService,
notificationSvc *NotificationService,
issuerRegistry map[string]IssuerConnector,
) *RenewalService {
return &RenewalService{
certRepo: certRepo,
jobRepo: jobRepo,
auditService: auditService,
notificationSvc: notificationSvc,
issuerRegistry: issuerRegistry,
certRepo: certRepo,
jobRepo: jobRepo,
renewalPolicyRepo: renewalPolicyRepo,
auditService: auditService,
notificationSvc: notificationSvc,
issuerRegistry: issuerRegistry,
}
}
// CheckExpiringCertificates identifies certificates needing renewal based on policy windows.
// CheckExpiringCertificates identifies certificates needing renewal and sends threshold-based
// expiration alerts. For each certificate, it looks up the renewal policy's configured alert
// thresholds (default: 30, 14, 7, 0 days) and sends deduplicated notifications at each threshold.
// Certificates are also transitioned to Expiring/Expired status as appropriate.
func (s *RenewalService) CheckExpiringCertificates(ctx context.Context) error {
// Default renewal window: 30 days before expiry
renewalWindow := time.Now().AddDate(0, 0, 30)
// Use the maximum possible threshold window (30 days) plus buffer for query
renewalWindow := time.Now().AddDate(0, 0, 31)
expiring, err := s.certRepo.GetExpiringCertificates(ctx, renewalWindow)
if err != nil {
return fmt.Errorf("failed to fetch expiring certificates: %w", err)
}
// Cache renewal policies to avoid repeated lookups
policyCache := make(map[string]*domain.RenewalPolicy)
for _, cert := range expiring {
// Skip if already renewing or archived
if cert.Status == domain.CertificateStatusRenewalInProgress || cert.Status == domain.CertificateStatusArchived {
@@ -80,11 +89,31 @@ func (s *RenewalService) CheckExpiringCertificates(ctx context.Context) error {
// Calculate days until expiry
daysUntil := time.Until(cert.ExpiresAt).Hours() / 24
// Send expiration warning notification (always, regardless of issuer availability)
if err := s.notificationSvc.SendExpirationWarning(ctx, cert, int(daysUntil)); err != nil {
fmt.Printf("failed to send expiration warning for cert %s: %v\n", cert.ID, err)
// Look up renewal policy for alert thresholds
thresholds := domain.DefaultAlertThresholds()
if cert.RenewalPolicyID != "" {
policy, ok := policyCache[cert.RenewalPolicyID]
if !ok {
policy, err = s.renewalPolicyRepo.Get(ctx, cert.RenewalPolicyID)
if err != nil {
// Log but continue with defaults
fmt.Printf("failed to fetch renewal policy %s for cert %s, using defaults: %v\n",
cert.RenewalPolicyID, cert.ID, err)
} else {
policyCache[cert.RenewalPolicyID] = policy
}
}
if policy != nil {
thresholds = policy.EffectiveAlertThresholds()
}
}
// Update certificate status based on expiry
s.updateCertExpiryStatus(ctx, cert, daysUntil)
// Send threshold-based alerts with deduplication
s.sendThresholdAlerts(ctx, cert, int(daysUntil), thresholds)
// Only create renewal job if an issuer connector is registered for this cert's issuer
if _, hasIssuer := s.issuerRegistry[cert.IssuerID]; !hasIssuer {
continue
@@ -137,6 +166,72 @@ func (s *RenewalService) CheckExpiringCertificates(ctx context.Context) error {
return nil
}
// sendThresholdAlerts sends deduplicated expiration notifications based on configured thresholds.
// For each threshold that the certificate has crossed (e.g., ≤30 days, ≤14 days), it checks
// whether a notification for that threshold was already sent. Only new threshold crossings
// trigger notifications.
func (s *RenewalService) sendThresholdAlerts(ctx context.Context, cert *domain.ManagedCertificate, daysUntil int, thresholds []int) {
for _, threshold := range thresholds {
// Only alert if the cert has crossed this threshold (days remaining ≤ threshold)
if daysUntil > threshold {
continue
}
// Check if we already sent a notification for this threshold (deduplication)
alreadySent, err := s.notificationSvc.HasThresholdNotification(ctx, cert.ID, threshold)
if err != nil {
fmt.Printf("failed to check notification dedup for cert %s threshold %d: %v\n",
cert.ID, threshold, err)
continue
}
if alreadySent {
continue
}
// Send the threshold alert
if err := s.notificationSvc.SendThresholdAlert(ctx, cert, daysUntil, threshold); err != nil {
fmt.Printf("failed to send threshold alert for cert %s at %d days: %v\n",
cert.ID, threshold, err)
}
// Record audit event for the alert
_ = s.auditService.RecordEvent(ctx, "system", domain.ActorTypeSystem,
"expiration_alert_sent", "certificate", cert.ID,
map[string]interface{}{
"threshold_days": threshold,
"days_until_expiry": daysUntil,
})
}
}
// updateCertExpiryStatus transitions a certificate to Expiring or Expired status based on
// how many days remain before expiry. Expired = 0 or fewer days, Expiring = within 30 days.
func (s *RenewalService) updateCertExpiryStatus(ctx context.Context, cert *domain.ManagedCertificate, daysUntil float64) {
var newStatus domain.CertificateStatus
if daysUntil <= 0 {
newStatus = domain.CertificateStatusExpired
} else {
newStatus = domain.CertificateStatusExpiring
}
// Only update if status is changing and cert isn't already in a terminal/active renewal state
if cert.Status == newStatus {
return
}
if cert.Status == domain.CertificateStatusRenewalInProgress ||
cert.Status == domain.CertificateStatusArchived ||
cert.Status == domain.CertificateStatusRevoked {
return
}
cert.Status = newStatus
cert.UpdatedAt = time.Now()
if err := s.certRepo.Update(ctx, cert); err != nil {
fmt.Printf("failed to update cert %s status to %s: %v\n", cert.ID, newStatus, err)
}
}
// ProcessRenewalJob executes a renewal job: generate CSR, call issuer, store new version,
// update cert status, and create deployment jobs for targets.
//