mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-07 17:22:07 +00:00
fix: end-to-end certificate lifecycle bugs + integration test environment
Fixes 12 production bugs preventing the full issuance→deployment flow from working with ACME (Pebble/Let's Encrypt) and step-ca issuers: ACME connector (acme.go): - Save orderURI before WaitOrder overwrites it (Go crypto/acme bug) - Add CreateOrderCert fallback via WaitOrder+FetchCert - Remove defer-reset in ValidateConfig that caused nil pointer panic - Add Insecure TLS option for self-signed ACME servers (Pebble) step-ca connector (stepca.go, jwe.go): - Real JWE provisioner key loading + decryption (was using ephemeral keys) - Fix JWT audience (/1.0/sign), sha claim (key fingerprint), kid header - Custom root CA trust via RootCertPath config - Remove hardcoded 90-day validity default (let step-ca decide) NGINX target connector (nginx.go): - Use sh -c for validate/reload commands (shell interpretation) - Use filepath.Dir instead of fragile string slicing - Add private key file writing (agent-mode keys were never deployed) - Make chain_path write conditional Server/service layer: - TriggerRenewalWithActor now creates actual Job records (was no-op) - createDeploymentJobs falls back to DB query when cert.TargetIDs empty - ProcessPendingJobs skips agent-routed deployment jobs - Agent cert pickup path parsing: len(parts)<4 → len(parts)<3 - Health/ready/auth-info endpoints bypass auth middleware - Write timeout 15s→120s for ACME issuance - Cert fingerprint computed on CSR submission Integration test environment (deploy/test/): - 10-phase test script covering Local CA, ACME, step-ca, revocation, discovery, renewal, and API spot checks - Docker Compose with 7 containers (server, agent, postgres, nginx, pebble, challtestsrv, step-ca) on isolated network - TLS verification checks SAN (not just Subject CN) for modern CA compat Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -178,14 +178,15 @@ func (s *AgentService) SubmitCSR(ctx context.Context, agentID string, certID str
|
||||
}
|
||||
|
||||
version := &domain.CertificateVersion{
|
||||
ID: generateID("certver"),
|
||||
CertificateID: certID,
|
||||
SerialNumber: result.Serial,
|
||||
NotBefore: result.NotBefore,
|
||||
NotAfter: result.NotAfter,
|
||||
PEMChain: result.CertPEM + "\n" + result.ChainPEM,
|
||||
CSRPEM: string(csrPEM),
|
||||
CreatedAt: time.Now(),
|
||||
ID: generateID("certver"),
|
||||
CertificateID: certID,
|
||||
SerialNumber: result.Serial,
|
||||
NotBefore: result.NotBefore,
|
||||
NotAfter: result.NotAfter,
|
||||
FingerprintSHA256: computeCertFingerprint(result.CertPEM),
|
||||
PEMChain: result.CertPEM + "\n" + result.ChainPEM,
|
||||
CSRPEM: string(csrPEM),
|
||||
CreatedAt: time.Now(),
|
||||
}
|
||||
|
||||
if err := s.certRepo.CreateVersion(ctx, version); err != nil {
|
||||
|
||||
@@ -14,10 +14,12 @@ import (
|
||||
type CertificateService struct {
|
||||
certRepo repository.CertificateRepository
|
||||
targetRepo repository.TargetRepository
|
||||
jobRepo repository.JobRepository
|
||||
policyService *PolicyService
|
||||
auditService *AuditService
|
||||
revSvc *RevocationSvc
|
||||
caSvc *CAOperationsSvc
|
||||
keygenMode string
|
||||
}
|
||||
|
||||
// NewCertificateService creates a new certificate service.
|
||||
@@ -48,6 +50,16 @@ func (s *CertificateService) SetTargetRepo(repo repository.TargetRepository) {
|
||||
s.targetRepo = repo
|
||||
}
|
||||
|
||||
// SetJobRepo sets the job repository for creating renewal/issuance jobs.
|
||||
func (s *CertificateService) SetJobRepo(repo repository.JobRepository) {
|
||||
s.jobRepo = repo
|
||||
}
|
||||
|
||||
// SetKeygenMode sets the key generation mode (agent or server).
|
||||
func (s *CertificateService) SetKeygenMode(mode string) {
|
||||
s.keygenMode = mode
|
||||
}
|
||||
|
||||
// List returns a paginated list of certificates matching the filter.
|
||||
func (s *CertificateService) List(ctx context.Context, filter *repository.CertificateFilter) ([]*domain.ManagedCertificate, int, error) {
|
||||
certs, total, err := s.certRepo.List(ctx, filter)
|
||||
@@ -195,6 +207,8 @@ func (s *CertificateService) GetVersions(ctx context.Context, certID string) ([]
|
||||
}
|
||||
|
||||
// TriggerRenewalWithActor initiates a renewal job if the certificate is eligible.
|
||||
// Creates a Renewal job (or Issuance for new certs) so the scheduler's job processor
|
||||
// can pick it up and route it through the issuer connector.
|
||||
func (s *CertificateService) TriggerRenewalWithActor(ctx context.Context, certID string, actor string) error {
|
||||
cert, err := s.certRepo.Get(ctx, certID)
|
||||
if err != nil {
|
||||
@@ -220,6 +234,45 @@ func (s *CertificateService) TriggerRenewalWithActor(ctx context.Context, certID
|
||||
return fmt.Errorf("failed to update certificate status: %w", err)
|
||||
}
|
||||
|
||||
// Create a renewal job so the job processor can pick it up.
|
||||
// In agent keygen mode, the job starts as AwaitingCSR so the agent
|
||||
// generates the key pair and submits a CSR. In server mode, it starts as Pending.
|
||||
if s.jobRepo != nil {
|
||||
jobStatus := domain.JobStatusPending
|
||||
if s.keygenMode == "agent" {
|
||||
jobStatus = domain.JobStatusAwaitingCSR
|
||||
}
|
||||
|
||||
// Determine job type: Issuance for certs that have never been issued,
|
||||
// Renewal for certs that already have a version.
|
||||
jobType := domain.JobTypeRenewal
|
||||
if cert.ExpiresAt.IsZero() || cert.ExpiresAt.Year() < 2000 {
|
||||
jobType = domain.JobTypeIssuance
|
||||
}
|
||||
|
||||
job := &domain.Job{
|
||||
ID: generateID("job"),
|
||||
CertificateID: cert.ID,
|
||||
Type: jobType,
|
||||
Status: jobStatus,
|
||||
MaxAttempts: 3,
|
||||
ScheduledAt: time.Now(),
|
||||
CreatedAt: time.Now(),
|
||||
}
|
||||
|
||||
if err := s.jobRepo.Create(ctx, job); err != nil {
|
||||
slog.Error("failed to create renewal job", "cert_id", cert.ID, "error", err)
|
||||
return fmt.Errorf("failed to create renewal job: %w", err)
|
||||
}
|
||||
|
||||
slog.Info("created renewal job via API trigger",
|
||||
"job_id", job.ID,
|
||||
"cert_id", cert.ID,
|
||||
"job_type", string(jobType),
|
||||
"job_status", string(jobStatus),
|
||||
"keygen_mode", s.keygenMode)
|
||||
}
|
||||
|
||||
// Record audit event
|
||||
if err := s.auditService.RecordEvent(ctx, actor, domain.ActorTypeUser,
|
||||
"renewal_triggered", "certificate", certID,
|
||||
|
||||
@@ -54,6 +54,16 @@ func (s *JobService) ProcessPendingJobs(ctx context.Context) error {
|
||||
|
||||
// Process each job
|
||||
for _, job := range pendingJobs {
|
||||
// Skip deployment jobs that have an agent_id — those are meant for agent
|
||||
// pickup via GetPendingWork(), not server-side processing. The server should
|
||||
// only process deployment jobs without an agent (legacy/serverless targets).
|
||||
if job.Type == domain.JobTypeDeployment && job.AgentID != nil && *job.AgentID != "" {
|
||||
s.logger.Debug("skipping agent-routed deployment job",
|
||||
"job_id", job.ID,
|
||||
"agent_id", *job.AgentID)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := s.processJob(ctx, job); err != nil {
|
||||
s.logger.Error("failed to process job",
|
||||
"job_id", job.ID,
|
||||
|
||||
+42
-13
@@ -636,23 +636,50 @@ func (s *RenewalService) CompleteAgentCSRRenewal(ctx context.Context, job *domai
|
||||
}
|
||||
|
||||
// createDeploymentJobs creates pending deployment jobs for each target associated with a cert.
|
||||
// If cert.TargetIDs is empty (common — the repository doesn't populate this field),
|
||||
// falls back to querying certificate_target_mappings via targetRepo.ListByCertificate.
|
||||
func (s *RenewalService) createDeploymentJobs(ctx context.Context, cert *domain.ManagedCertificate) {
|
||||
if len(cert.TargetIDs) == 0 {
|
||||
// Resolve targets: prefer in-memory TargetIDs, fall back to DB query
|
||||
type targetInfo struct {
|
||||
id string
|
||||
agentID string
|
||||
}
|
||||
var targets []targetInfo
|
||||
|
||||
if len(cert.TargetIDs) > 0 {
|
||||
// TargetIDs populated (e.g. from test or manual wiring)
|
||||
for _, tid := range cert.TargetIDs {
|
||||
ti := targetInfo{id: tid}
|
||||
if s.targetRepo != nil {
|
||||
if target, err := s.targetRepo.Get(ctx, tid); err == nil && target.AgentID != "" {
|
||||
ti.agentID = target.AgentID
|
||||
}
|
||||
}
|
||||
targets = append(targets, ti)
|
||||
}
|
||||
} else if s.targetRepo != nil {
|
||||
// TargetIDs empty — query certificate_target_mappings via repository
|
||||
dbTargets, err := s.targetRepo.ListByCertificate(ctx, cert.ID)
|
||||
if err != nil {
|
||||
slog.Error("failed to query targets for certificate", "cert_id", cert.ID, "error", err)
|
||||
return
|
||||
}
|
||||
for _, t := range dbTargets {
|
||||
targets = append(targets, targetInfo{id: t.ID, agentID: t.AgentID})
|
||||
}
|
||||
}
|
||||
|
||||
if len(targets) == 0 {
|
||||
slog.Debug("no targets found for certificate, skipping deployment", "cert_id", cert.ID)
|
||||
return
|
||||
}
|
||||
for _, targetID := range cert.TargetIDs {
|
||||
tid := targetID
|
||||
|
||||
// Resolve agent_id from target for job routing
|
||||
for _, t := range targets {
|
||||
tid := t.id
|
||||
var agentIDPtr *string
|
||||
if s.targetRepo != nil {
|
||||
target, err := s.targetRepo.Get(ctx, tid)
|
||||
if err != nil {
|
||||
slog.Warn("failed to resolve agent for deployment job", "target_id", tid, "error", err)
|
||||
} else if target.AgentID != "" {
|
||||
agentID := target.AgentID
|
||||
agentIDPtr = &agentID
|
||||
}
|
||||
if t.agentID != "" {
|
||||
aid := t.agentID
|
||||
agentIDPtr = &aid
|
||||
}
|
||||
|
||||
deployJob := &domain.Job{
|
||||
@@ -667,7 +694,9 @@ func (s *RenewalService) createDeploymentJobs(ctx context.Context, cert *domain.
|
||||
CreatedAt: time.Now(),
|
||||
}
|
||||
if err := s.jobRepo.Create(ctx, deployJob); err != nil {
|
||||
slog.Error("failed to create deployment job for target", "target_id", targetID, "error", err)
|
||||
slog.Error("failed to create deployment job for target", "target_id", tid, "cert_id", cert.ID, "error", err)
|
||||
} else {
|
||||
slog.Info("created deployment job", "job_id", deployJob.ID, "cert_id", cert.ID, "target_id", tid, "agent_id", t.agentID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user