Files
certctl/internal/service/job_offline_agent_reaper_test.go
T
shankar0123 8b75e0311b chore: rename Go module path to github.com/certctl-io/certctl
Mechanical sed across the main go.mod's module declaration, the f5-mock-icontrol
sub-module's go.mod, every Go file's import path (361 files), and a rebuild of
the checked-in f5-mock-icontrol binary so its embedded build-info reflects the
new module path. No behavior change.

Choice B from cowork/transfer-certctl-to-org.md, executed 2026-05-04. Choice A
(keep module path declared as github.com/shankar0123/certctl regardless of
repo URL) shipped on the day of the org transfer (2026-05-03) since we had no
external Go consumers; this commit closes that deferral.

Backward-compat: GitHub HTTP redirects continue to forward
github.com/shankar0123/certctl → github.com/certctl-io/certctl at the URL
level, but Go's module proxy uses the path declared in go.mod as the
canonical name. Pre-fix, anyone trying `go get github.com/certctl-io/certctl/...`
hit a "module path mismatch" error because go.mod said
github.com/shankar0123/certctl and the URL they fetched it from said
certctl-io/certctl. Post-fix, the canonical name and the URL agree, so
go get / go install / external Go consumers / Go-tooling integrations
work cleanly via either the new path (preferred) or the old path (which
redirects and Go follows the redirect for source fetch).

Anyone still importing the old path inside their own code keeps working
provided they update their go.mod's `require` line to match — the module
path declared in their consumer's go.sum / go.mod is the authoritative
import name, so a mass sed across their import statements is the migration
on the consumer side. No external consumers exist today.

Diff shape:
  361 *.go files  — import path replacement only
    2 go.mod     — module declaration replacement only
    1 binary     — deploy/test/f5-mock-icontrol/f5-mock-icontrol rebuilt
                   so embedded build-info reflects the new path (8618965 vs
                   8618933 bytes; 32-byte diff is the build-info change)

  Total: 364 files, 730 insertions / 730 deletions, net-zero size, pure
  mechanical substitution.

Verification:
  gofmt: 17 files needed re-alignment after sed (the new path is one char
    shorter than the old, so column-aligned import groups drifted). Applied
    `gofmt -w` to fix.
  go mod tidy: clean exit on both modules.
  go vet ./...: clean exit.
  go build ./...: clean exit.
  go test -short -count=1 on representative packages: all green
    (internal/domain, internal/validation, internal/crypto, internal/crypto/signer,
    cmd/agent). Test output now reads `ok github.com/certctl-io/certctl/...`
    confirming the module path resolves correctly.
  binary: f5-mock-icontrol rebuilt; `strings | grep shankar0123` returns
    nothing; `strings | grep certctl-io/certctl` shows the new module path
    embedded in build-info.

Files intentionally NOT touched in this commit:
  README.md / CHANGELOG.md / docs/ / etc. — already swept to certctl-io
    URLs in commit 0729ee4 (the post-transfer URL refresh). This commit is
    purely the Go-tooling layer.
  Scarf pixels (`shankar0123.docker.scarf.sh/...`) — Scarf-account
    namespace, not a Go import or GitHub repo URL. Stays.

This is a non-blocking, non-customer-impacting change. Operators pulling
container images, running `make verify`, hitting the API, or installing the
agent see no functional difference. Only Go-tooling consumers (none today)
are affected, and they're enabled — not broken — by this commit.
2026-05-04 00:30:29 +00:00

170 lines
5.7 KiB
Go

package service
import (
"context"
"errors"
"io"
"log/slog"
"strings"
"testing"
"time"
"github.com/certctl-io/certctl/internal/domain"
)
// Bundle C / Audit M-016 (CWE-754): regression suite for the new
// ReapJobsWithOfflineAgents path. Pre-bundle the reaper only handled
// AwaitingCSR / AwaitingApproval timeouts; jobs claimed by an agent
// that subsequently dies sat in Running indefinitely. These tests pin
// the new behavior end-to-end through the JobService → mockJobRepo
// boundary.
func newOfflineReaperService(t *testing.T) (*JobService, *mockJobRepo, *mockAuditRepo) {
t.Helper()
jobRepo := &mockJobRepo{
Jobs: map[string]*domain.Job{},
Agents: map[string]*domain.Agent{},
}
auditRepo := newMockAuditRepository()
auditService := NewAuditService(auditRepo)
svc := NewJobService(jobRepo, nil, nil, nil, nil, slog.New(slog.NewTextHandler(io.Discard, nil)))
svc.SetAuditService(auditService)
return svc, jobRepo, auditRepo
}
func mkRunningJob(id, agentID string) *domain.Job {
a := agentID
now := time.Now()
return &domain.Job{
ID: id,
AgentID: &a,
Status: domain.JobStatusRunning,
CreatedAt: now.Add(-2 * time.Hour),
}
}
func mkAgentWithHeartbeat(id string, hbAge time.Duration) *domain.Agent {
hb := time.Now().Add(-hbAge)
return &domain.Agent{
ID: id,
Name: id,
LastHeartbeatAt: &hb,
}
}
func TestReapJobsWithOfflineAgents_FlipsRunningToFailed(t *testing.T) {
svc, repo, _ := newOfflineReaperService(t)
repo.Agents["agt-stale"] = mkAgentWithHeartbeat("agt-stale", 30*time.Minute)
repo.Agents["agt-fresh"] = mkAgentWithHeartbeat("agt-fresh", 1*time.Minute)
repo.Jobs["j-stale"] = mkRunningJob("j-stale", "agt-stale")
repo.Jobs["j-fresh"] = mkRunningJob("j-fresh", "agt-fresh")
if err := svc.ReapJobsWithOfflineAgents(context.Background(), 10*time.Minute); err != nil {
t.Fatalf("reaper returned error: %v", err)
}
if got := repo.Jobs["j-stale"].Status; got != domain.JobStatusFailed {
t.Errorf("stale-agent job status = %s, want Failed", got)
}
if got := repo.Jobs["j-fresh"].Status; got != domain.JobStatusRunning {
t.Errorf("fresh-agent job status = %s, want Running (must NOT be reaped)", got)
}
stale := repo.Jobs["j-stale"]
if stale.LastError == nil || !strings.Contains(*stale.LastError, "agent offline") {
t.Errorf("stale job LastError must cite agent offline; got: %v", stale.LastError)
}
}
func TestReapJobsWithOfflineAgents_SkipsServerKeygenJobs(t *testing.T) {
// Jobs without an agent_id (server-side keygen) must NOT be reaped
// by this path — they have no agent to be "offline".
svc, repo, _ := newOfflineReaperService(t)
noAgent := &domain.Job{
ID: "j-server",
Status: domain.JobStatusRunning,
CreatedAt: time.Now().Add(-time.Hour),
}
repo.Jobs["j-server"] = noAgent
if err := svc.ReapJobsWithOfflineAgents(context.Background(), 1*time.Minute); err != nil {
t.Fatalf("reaper returned error: %v", err)
}
if got := repo.Jobs["j-server"].Status; got != domain.JobStatusRunning {
t.Errorf("server-keygen job (no agent_id) status = %s, want Running", got)
}
}
func TestReapJobsWithOfflineAgents_SkipsNonRunningJobs(t *testing.T) {
// Pending / AwaitingCSR / AwaitingApproval jobs are NOT in scope —
// they're handled by ReapTimedOutJobs (I-003) or ClaimPendingJobs.
svc, repo, _ := newOfflineReaperService(t)
repo.Agents["agt-stale"] = mkAgentWithHeartbeat("agt-stale", 1*time.Hour)
repo.Jobs["j-pending"] = func() *domain.Job {
j := mkRunningJob("j-pending", "agt-stale")
j.Status = domain.JobStatusPending
return j
}()
if err := svc.ReapJobsWithOfflineAgents(context.Background(), 1*time.Minute); err != nil {
t.Fatalf("reaper returned error: %v", err)
}
if got := repo.Jobs["j-pending"].Status; got != domain.JobStatusPending {
t.Errorf("Pending job status = %s, want Pending (out of scope for offline-agent reaper)", got)
}
}
func TestReapJobsWithOfflineAgents_RejectsNonPositiveTTL(t *testing.T) {
svc, _, _ := newOfflineReaperService(t)
if err := svc.ReapJobsWithOfflineAgents(context.Background(), 0); err == nil {
t.Error("expected error for zero TTL — fail-loud guard against misconfig")
}
if err := svc.ReapJobsWithOfflineAgents(context.Background(), -time.Hour); err == nil {
t.Error("expected error for negative TTL — fail-loud guard against misconfig")
}
}
func TestReapJobsWithOfflineAgents_PropagatesRepoError(t *testing.T) {
svc, repo, _ := newOfflineReaperService(t)
repo.ListOfflineAgentJobsErr = errors.New("simulated db down")
err := svc.ReapJobsWithOfflineAgents(context.Background(), 5*time.Minute)
if err == nil {
t.Fatal("expected error to propagate from repo")
}
if !strings.Contains(err.Error(), "simulated db down") {
t.Errorf("expected wrapped repo error, got: %v", err)
}
}
func TestReapJobsWithOfflineAgents_RecordsAuditEvent(t *testing.T) {
svc, repo, audit := newOfflineReaperService(t)
repo.Agents["agt-stale"] = mkAgentWithHeartbeat("agt-stale", 30*time.Minute)
repo.Jobs["j-stale"] = mkRunningJob("j-stale", "agt-stale")
if err := svc.ReapJobsWithOfflineAgents(context.Background(), 5*time.Minute); err != nil {
t.Fatalf("reaper: %v", err)
}
audit.mu.Lock()
events := append([]*domain.AuditEvent(nil), audit.Events...)
audit.mu.Unlock()
var found *domain.AuditEvent
for i := range events {
if events[i].Action == "job_offline_agent_reap" {
found = events[i]
break
}
}
if found == nil {
t.Fatal("expected job_offline_agent_reap audit event, got none")
}
if found.Actor != "system" {
t.Errorf("audit Actor = %q, want system", found.Actor)
}
if found.ResourceType != "job" || found.ResourceID != "j-stale" {
t.Errorf("audit resource binding wrong: %s/%s", found.ResourceType, found.ResourceID)
}
}