mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-07 21:21:40 +00:00
8b75e0311b
Mechanical sed across the main go.mod's module declaration, the f5-mock-icontrol
sub-module's go.mod, every Go file's import path (361 files), and a rebuild of
the checked-in f5-mock-icontrol binary so its embedded build-info reflects the
new module path. No behavior change.
Choice B from cowork/transfer-certctl-to-org.md, executed 2026-05-04. Choice A
(keep module path declared as github.com/shankar0123/certctl regardless of
repo URL) shipped on the day of the org transfer (2026-05-03) since we had no
external Go consumers; this commit closes that deferral.
Backward-compat: GitHub HTTP redirects continue to forward
github.com/shankar0123/certctl → github.com/certctl-io/certctl at the URL
level, but Go's module proxy uses the path declared in go.mod as the
canonical name. Pre-fix, anyone trying `go get github.com/certctl-io/certctl/...`
hit a "module path mismatch" error because go.mod said
github.com/shankar0123/certctl and the URL they fetched it from said
certctl-io/certctl. Post-fix, the canonical name and the URL agree, so
go get / go install / external Go consumers / Go-tooling integrations
work cleanly via either the new path (preferred) or the old path (which
redirects and Go follows the redirect for source fetch).
Anyone still importing the old path inside their own code keeps working
provided they update their go.mod's `require` line to match — the module
path declared in their consumer's go.sum / go.mod is the authoritative
import name, so a mass sed across their import statements is the migration
on the consumer side. No external consumers exist today.
Diff shape:
361 *.go files — import path replacement only
2 go.mod — module declaration replacement only
1 binary — deploy/test/f5-mock-icontrol/f5-mock-icontrol rebuilt
so embedded build-info reflects the new path (8618965 vs
8618933 bytes; 32-byte diff is the build-info change)
Total: 364 files, 730 insertions / 730 deletions, net-zero size, pure
mechanical substitution.
Verification:
gofmt: 17 files needed re-alignment after sed (the new path is one char
shorter than the old, so column-aligned import groups drifted). Applied
`gofmt -w` to fix.
go mod tidy: clean exit on both modules.
go vet ./...: clean exit.
go build ./...: clean exit.
go test -short -count=1 on representative packages: all green
(internal/domain, internal/validation, internal/crypto, internal/crypto/signer,
cmd/agent). Test output now reads `ok github.com/certctl-io/certctl/...`
confirming the module path resolves correctly.
binary: f5-mock-icontrol rebuilt; `strings | grep shankar0123` returns
nothing; `strings | grep certctl-io/certctl` shows the new module path
embedded in build-info.
Files intentionally NOT touched in this commit:
README.md / CHANGELOG.md / docs/ / etc. — already swept to certctl-io
URLs in commit 0729ee4 (the post-transfer URL refresh). This commit is
purely the Go-tooling layer.
Scarf pixels (`shankar0123.docker.scarf.sh/...`) — Scarf-account
namespace, not a Go import or GitHub repo URL. Stays.
This is a non-blocking, non-customer-impacting change. Operators pulling
container images, running `make verify`, hitting the API, or installing the
agent see no functional difference. Only Go-tooling consumers (none today)
are affected, and they're enabled — not broken — by this commit.
337 lines
12 KiB
Go
337 lines
12 KiB
Go
package service
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"time"
|
|
|
|
"github.com/certctl-io/certctl/internal/domain"
|
|
"github.com/certctl-io/certctl/internal/repository"
|
|
)
|
|
|
|
// DeploymentService manages certificate deployment to targets via agents.
|
|
type DeploymentService struct {
|
|
jobRepo repository.JobRepository
|
|
targetRepo repository.TargetRepository
|
|
agentRepo repository.AgentRepository
|
|
certRepo repository.CertificateRepository
|
|
auditService *AuditService
|
|
notificationSvc *NotificationService
|
|
}
|
|
|
|
// NewDeploymentService creates a new deployment service.
|
|
func NewDeploymentService(
|
|
jobRepo repository.JobRepository,
|
|
targetRepo repository.TargetRepository,
|
|
agentRepo repository.AgentRepository,
|
|
certRepo repository.CertificateRepository,
|
|
auditService *AuditService,
|
|
notificationSvc *NotificationService,
|
|
) *DeploymentService {
|
|
return &DeploymentService{
|
|
jobRepo: jobRepo,
|
|
targetRepo: targetRepo,
|
|
agentRepo: agentRepo,
|
|
certRepo: certRepo,
|
|
auditService: auditService,
|
|
notificationSvc: notificationSvc,
|
|
}
|
|
}
|
|
|
|
// CreateDeploymentJobs creates a job for each target of a certificate.
|
|
func (s *DeploymentService) CreateDeploymentJobs(ctx context.Context, certID string) ([]string, error) {
|
|
// Fetch all targets for this certificate
|
|
targets, err := s.targetRepo.ListByCertificate(ctx, certID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to list targets: %w", err)
|
|
}
|
|
|
|
if len(targets) == 0 {
|
|
return nil, fmt.Errorf("no targets found for certificate %s", certID)
|
|
}
|
|
|
|
var jobIDs []string
|
|
|
|
// Create a deployment job for each target
|
|
for _, target := range targets {
|
|
job := &domain.Job{
|
|
ID: generateID("job"),
|
|
CertificateID: certID,
|
|
Type: domain.JobTypeDeployment,
|
|
Status: domain.JobStatusPending,
|
|
ScheduledAt: time.Now(),
|
|
CreatedAt: time.Now(),
|
|
}
|
|
// Store target info in TargetID field
|
|
if target.ID != "" {
|
|
job.TargetID = &target.ID
|
|
}
|
|
// Route job to the target's assigned agent
|
|
if target.AgentID != "" {
|
|
agentID := target.AgentID
|
|
job.AgentID = &agentID
|
|
}
|
|
|
|
if err := s.jobRepo.Create(ctx, job); err != nil {
|
|
slog.Error("failed to create deployment job for target", "target_id", target.ID, "error", err)
|
|
continue
|
|
}
|
|
|
|
jobIDs = append(jobIDs, job.ID)
|
|
}
|
|
|
|
if len(jobIDs) == 0 {
|
|
return nil, fmt.Errorf("failed to create any deployment jobs")
|
|
}
|
|
|
|
// Record audit event
|
|
if auditErr := s.auditService.RecordEvent(ctx, "system", domain.ActorTypeSystem,
|
|
"deployment_jobs_created", "certificate", certID,
|
|
map[string]interface{}{"target_count": len(targets), "job_count": len(jobIDs)}); auditErr != nil {
|
|
slog.Error("failed to record audit event", "error", auditErr)
|
|
}
|
|
|
|
return jobIDs, nil
|
|
}
|
|
|
|
// ProcessDeploymentJob handles a deployment job by coordinating with an agent.
|
|
func (s *DeploymentService) ProcessDeploymentJob(ctx context.Context, job *domain.Job) error {
|
|
// Update job status to in-progress
|
|
if err := s.jobRepo.UpdateStatus(ctx, job.ID, domain.JobStatusRunning, ""); err != nil {
|
|
return fmt.Errorf("failed to update job status: %w", err)
|
|
}
|
|
|
|
// Fetch certificate
|
|
cert, err := s.certRepo.Get(ctx, job.CertificateID)
|
|
if err != nil {
|
|
updateErr := s.jobRepo.UpdateStatus(ctx, job.ID, domain.JobStatusFailed, fmt.Sprintf("certificate fetch failed: %v", err))
|
|
if updateErr != nil {
|
|
slog.Error("failed to update job status", "job_id", job.ID, "error", updateErr)
|
|
}
|
|
return fmt.Errorf("failed to fetch certificate: %w", err)
|
|
}
|
|
|
|
// Fetch target
|
|
var targetID string
|
|
if job.TargetID != nil {
|
|
targetID = *job.TargetID
|
|
}
|
|
if targetID == "" {
|
|
updateErr := s.jobRepo.UpdateStatus(ctx, job.ID, domain.JobStatusFailed, "target_id not found in job")
|
|
if updateErr != nil {
|
|
slog.Error("failed to update job status", "job_id", job.ID, "error", updateErr)
|
|
}
|
|
return fmt.Errorf("target_id not found in job")
|
|
}
|
|
|
|
target, err := s.targetRepo.Get(ctx, targetID)
|
|
if err != nil {
|
|
updateErr := s.jobRepo.UpdateStatus(ctx, job.ID, domain.JobStatusFailed, fmt.Sprintf("target fetch failed: %v", err))
|
|
if updateErr != nil {
|
|
slog.Error("failed to update job status", "job_id", job.ID, "error", updateErr)
|
|
}
|
|
return fmt.Errorf("failed to fetch target: %w", err)
|
|
}
|
|
|
|
// Verify agent is available
|
|
agentID := target.AgentID
|
|
agent, err := s.agentRepo.Get(ctx, agentID)
|
|
if err != nil {
|
|
updateErr := s.jobRepo.UpdateStatus(ctx, job.ID, domain.JobStatusFailed, fmt.Sprintf("agent fetch failed: %v", err))
|
|
if updateErr != nil {
|
|
slog.Error("failed to update job status", "job_id", job.ID, "error", updateErr)
|
|
}
|
|
return fmt.Errorf("failed to fetch agent: %w", err)
|
|
}
|
|
|
|
// I-004: AgentRepository.Get surfaces retired rows by design (for the GUI
|
|
// banner + 410 Gone heartbeat path). Deployments must never dispatch to a
|
|
// retired agent — it will never heartbeat again and the target row should
|
|
// itself have been cascade-retired when the agent was force-retired. A job
|
|
// slipping through here would otherwise hit the heartbeat-staleness branch
|
|
// below with the misleading reason "agent is offline"; we want operators to
|
|
// see the real cause. Fail the job with an explicit reason, send a
|
|
// deployment notification so the owner is alerted, and record an audit
|
|
// event. Falls through the same notify+audit shape as the offline branch.
|
|
if agent.IsRetired() {
|
|
updateErr := s.jobRepo.UpdateStatus(ctx, job.ID, domain.JobStatusFailed, "assigned agent is retired")
|
|
if updateErr != nil {
|
|
slog.Error("failed to update job status", "job_id", job.ID, "error", updateErr)
|
|
}
|
|
if notifErr := s.notificationSvc.SendDeploymentNotification(ctx, cert, target, false, fmt.Errorf("agent retired")); notifErr != nil {
|
|
slog.Error("failed to send deployment notification", "error", notifErr)
|
|
}
|
|
if auditErr := s.auditService.RecordEvent(ctx, "system", domain.ActorTypeSystem,
|
|
"deployment_job_failed", "certificate", job.CertificateID,
|
|
map[string]interface{}{"job_id": job.ID, "reason": "agent retired", "target_id": targetID, "agent_id": agentID}); auditErr != nil {
|
|
slog.Error("failed to record audit event", "error", auditErr)
|
|
}
|
|
return fmt.Errorf("agent %s is retired", agentID)
|
|
}
|
|
|
|
// Check agent heartbeat (must be within last 5 minutes)
|
|
if agent.LastHeartbeatAt != nil && time.Since(*agent.LastHeartbeatAt) > 5*time.Minute {
|
|
updateErr := s.jobRepo.UpdateStatus(ctx, job.ID, domain.JobStatusFailed, "agent is offline")
|
|
if updateErr != nil {
|
|
slog.Error("failed to update job status", "job_id", job.ID, "error", updateErr)
|
|
}
|
|
|
|
if notifErr := s.notificationSvc.SendDeploymentNotification(ctx, cert, target, false, fmt.Errorf("agent offline")); notifErr != nil {
|
|
slog.Error("failed to send deployment notification", "error", notifErr)
|
|
}
|
|
if auditErr := s.auditService.RecordEvent(ctx, "system", domain.ActorTypeSystem,
|
|
"deployment_job_failed", "certificate", job.CertificateID,
|
|
map[string]interface{}{"job_id": job.ID, "reason": "agent offline", "target_id": targetID}); auditErr != nil {
|
|
slog.Error("failed to record audit event", "error", auditErr)
|
|
}
|
|
|
|
return fmt.Errorf("agent %s is offline", agentID)
|
|
}
|
|
|
|
// In a real implementation, the agent would poll GetPendingWork() to fetch this job.
|
|
// The control plane would wait for the agent to complete the work asynchronously.
|
|
// For now, we mark it as pending and rely on agent polling.
|
|
|
|
// Record audit event
|
|
if auditErr := s.auditService.RecordEvent(ctx, "system", domain.ActorTypeSystem,
|
|
"deployment_job_dispatched", "certificate", job.CertificateID,
|
|
map[string]interface{}{"job_id": job.ID, "target_id": targetID, "agent_id": agentID}); auditErr != nil {
|
|
slog.Error("failed to record audit event", "error", auditErr)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// ValidateDeployment checks the deployment status of a certificate on a target.
|
|
func (s *DeploymentService) ValidateDeployment(ctx context.Context, certID string, targetID string) (bool, error) {
|
|
// List deployment jobs for this certificate and target
|
|
jobs, err := s.jobRepo.ListByCertificate(ctx, certID)
|
|
if err != nil {
|
|
return false, fmt.Errorf("failed to list jobs: %w", err)
|
|
}
|
|
|
|
for _, job := range jobs {
|
|
if job.Type != domain.JobTypeDeployment {
|
|
continue
|
|
}
|
|
|
|
// Check if this job is for the target
|
|
if job.TargetID == nil || *job.TargetID != targetID {
|
|
continue
|
|
}
|
|
|
|
// Check if the most recent job for this target succeeded
|
|
if job.Status == domain.JobStatusCompleted {
|
|
return true, nil
|
|
}
|
|
|
|
if job.Status == domain.JobStatusFailed {
|
|
if job.LastError != nil {
|
|
return false, fmt.Errorf("deployment failed: %s", *job.LastError)
|
|
}
|
|
return false, fmt.Errorf("deployment failed")
|
|
}
|
|
|
|
// Still in progress
|
|
return false, fmt.Errorf("deployment in progress")
|
|
}
|
|
|
|
// No deployment job found
|
|
return false, fmt.Errorf("no deployment job found for target %s", targetID)
|
|
}
|
|
|
|
// MarkDeploymentComplete marks a deployment job as successfully completed.
|
|
// This is called by agents after they finish deploying a certificate.
|
|
func (s *DeploymentService) MarkDeploymentComplete(ctx context.Context, jobID string) error {
|
|
job, err := s.jobRepo.Get(ctx, jobID)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to fetch job: %w", err)
|
|
}
|
|
|
|
if err := s.jobRepo.UpdateStatus(ctx, jobID, domain.JobStatusCompleted, ""); err != nil {
|
|
return fmt.Errorf("failed to update job status: %w", err)
|
|
}
|
|
|
|
// Fetch certificate and target for notification
|
|
cert, err := s.certRepo.Get(ctx, job.CertificateID)
|
|
if err != nil {
|
|
slog.Error("failed to fetch certificate for notification", "error", err)
|
|
return nil
|
|
}
|
|
|
|
var targetID string
|
|
if job.TargetID != nil {
|
|
targetID = *job.TargetID
|
|
}
|
|
|
|
if targetID != "" {
|
|
target, err := s.targetRepo.Get(ctx, targetID)
|
|
if err != nil {
|
|
slog.Error("failed to fetch target for notification", "error", err)
|
|
return nil
|
|
}
|
|
|
|
// Send deployment success notification
|
|
if err := s.notificationSvc.SendDeploymentNotification(ctx, cert, target, true, nil); err != nil {
|
|
slog.Error("failed to send deployment notification", "error", err)
|
|
}
|
|
}
|
|
|
|
// Record audit event
|
|
if auditErr := s.auditService.RecordEvent(ctx, "system", domain.ActorTypeSystem,
|
|
"deployment_job_completed", "certificate", job.CertificateID,
|
|
map[string]interface{}{"job_id": jobID, "target_id": targetID}); auditErr != nil {
|
|
slog.Error("failed to record audit event", "error", auditErr)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// MarkDeploymentFailed marks a deployment job as failed.
|
|
// Called by agents when deployment fails.
|
|
func (s *DeploymentService) MarkDeploymentFailed(ctx context.Context, jobID string, errMsg string) error {
|
|
job, err := s.jobRepo.Get(ctx, jobID)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to fetch job: %w", err)
|
|
}
|
|
|
|
if err := s.jobRepo.UpdateStatus(ctx, jobID, domain.JobStatusFailed, errMsg); err != nil {
|
|
return fmt.Errorf("failed to update job status: %w", err)
|
|
}
|
|
|
|
// Fetch certificate and target for notification
|
|
cert, err := s.certRepo.Get(ctx, job.CertificateID)
|
|
if err != nil {
|
|
slog.Error("failed to fetch certificate for notification", "error", err)
|
|
return nil
|
|
}
|
|
|
|
var targetID string
|
|
if job.TargetID != nil {
|
|
targetID = *job.TargetID
|
|
}
|
|
|
|
if targetID != "" {
|
|
target, err := s.targetRepo.Get(ctx, targetID)
|
|
if err != nil {
|
|
slog.Error("failed to fetch target for notification", "error", err)
|
|
return nil
|
|
}
|
|
|
|
// Send deployment failure notification
|
|
if err := s.notificationSvc.SendDeploymentNotification(ctx, cert, target, false, fmt.Errorf("%s", errMsg)); err != nil {
|
|
slog.Error("failed to send deployment notification", "error", err)
|
|
}
|
|
}
|
|
|
|
// Record audit event
|
|
if auditErr := s.auditService.RecordEvent(ctx, "system", domain.ActorTypeSystem,
|
|
"deployment_job_failed", "certificate", job.CertificateID,
|
|
map[string]interface{}{"job_id": jobID, "target_id": targetID, "error": errMsg}); auditErr != nil {
|
|
slog.Error("failed to record audit event", "error", auditErr)
|
|
}
|
|
|
|
return nil
|
|
}
|