mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-07 18:11:32 +00:00
Merge branch 'fix/m6-sentinel-idempotent-create'
Resolves M-6 (Medium): swallowed sentinel agent INSERT errors. CWE-662 / CWE-209-adjacent. Shape A: CreateIfNotExists helper + 4 sentinel call sites.
This commit is contained in:
+33
-9
@@ -253,9 +253,15 @@ func main() {
|
||||
Name: "Network Scanner (Server-Side)",
|
||||
Status: domain.AgentStatusOnline,
|
||||
}
|
||||
if err := agentRepo.Create(context.Background(), sentinelAgent); err != nil {
|
||||
// Ignore duplicate key errors (agent already exists)
|
||||
logger.Debug("sentinel agent creation", "status", "exists or created", "id", service.SentinelAgentID)
|
||||
// M-6: use CreateIfNotExists so duplicate rows on restart/upgrade are
|
||||
// idempotent without swallowing unrelated DB failures (CWE-662).
|
||||
created, err := agentRepo.CreateIfNotExists(context.Background(), sentinelAgent)
|
||||
if err != nil {
|
||||
logger.Error("sentinel agent creation failed", "id", service.SentinelAgentID, "error", err)
|
||||
} else if created {
|
||||
logger.Info("sentinel agent created", "id", service.SentinelAgentID)
|
||||
} else {
|
||||
logger.Debug("sentinel agent already exists", "id", service.SentinelAgentID)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -274,8 +280,14 @@ func main() {
|
||||
Name: "AWS Secrets Manager Discovery",
|
||||
Status: domain.AgentStatusOnline,
|
||||
}
|
||||
if err := agentRepo.Create(context.Background(), sentinelAWS); err != nil {
|
||||
logger.Debug("sentinel agent creation", "status", "exists or created", "id", service.SentinelAWSSecretsMgr)
|
||||
// M-6: idempotent create (CWE-662).
|
||||
created, err := agentRepo.CreateIfNotExists(context.Background(), sentinelAWS)
|
||||
if err != nil {
|
||||
logger.Error("sentinel agent creation failed", "id", service.SentinelAWSSecretsMgr, "error", err)
|
||||
} else if created {
|
||||
logger.Info("sentinel agent created", "id", service.SentinelAWSSecretsMgr)
|
||||
} else {
|
||||
logger.Debug("sentinel agent already exists", "id", service.SentinelAWSSecretsMgr)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -293,8 +305,14 @@ func main() {
|
||||
Name: "Azure Key Vault Discovery",
|
||||
Status: domain.AgentStatusOnline,
|
||||
}
|
||||
if err := agentRepo.Create(context.Background(), sentinelAzure); err != nil {
|
||||
logger.Debug("sentinel agent creation", "status", "exists or created", "id", service.SentinelAzureKeyVault)
|
||||
// M-6: idempotent create (CWE-662).
|
||||
created, err := agentRepo.CreateIfNotExists(context.Background(), sentinelAzure)
|
||||
if err != nil {
|
||||
logger.Error("sentinel agent creation failed", "id", service.SentinelAzureKeyVault, "error", err)
|
||||
} else if created {
|
||||
logger.Info("sentinel agent created", "id", service.SentinelAzureKeyVault)
|
||||
} else {
|
||||
logger.Debug("sentinel agent already exists", "id", service.SentinelAzureKeyVault)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -307,8 +325,14 @@ func main() {
|
||||
Name: "GCP Secret Manager Discovery",
|
||||
Status: domain.AgentStatusOnline,
|
||||
}
|
||||
if err := agentRepo.Create(context.Background(), sentinelGCP); err != nil {
|
||||
logger.Debug("sentinel agent creation", "status", "exists or created", "id", service.SentinelGCPSecretMgr)
|
||||
// M-6: idempotent create (CWE-662).
|
||||
created, err := agentRepo.CreateIfNotExists(context.Background(), sentinelGCP)
|
||||
if err != nil {
|
||||
logger.Error("sentinel agent creation failed", "id", service.SentinelGCPSecretMgr, "error", err)
|
||||
} else if created {
|
||||
logger.Info("sentinel agent created", "id", service.SentinelGCPSecretMgr)
|
||||
} else {
|
||||
logger.Debug("sentinel agent already exists", "id", service.SentinelGCPSecretMgr)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -772,6 +772,14 @@ func (m *mockAgentRepository) Create(ctx context.Context, agent *domain.Agent) e
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockAgentRepository) CreateIfNotExists(ctx context.Context, agent *domain.Agent) (bool, error) {
|
||||
if _, exists := m.agents[agent.ID]; exists {
|
||||
return false, nil
|
||||
}
|
||||
m.agents[agent.ID] = agent
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (m *mockAgentRepository) Update(ctx context.Context, agent *domain.Agent) error {
|
||||
m.agents[agent.ID] = agent
|
||||
return nil
|
||||
|
||||
@@ -90,8 +90,18 @@ type AgentRepository interface {
|
||||
List(ctx context.Context) ([]*domain.Agent, error)
|
||||
// Get retrieves an agent by ID.
|
||||
Get(ctx context.Context, id string) (*domain.Agent, error)
|
||||
// Create stores a new agent.
|
||||
// Create stores a new agent. Callers that want duplicate-key errors surfaced
|
||||
// (e.g. real-agent registration) must use this method; sentinel/bootstrap
|
||||
// paths that expect the row to already exist on restart should call
|
||||
// CreateIfNotExists instead (M-6, CWE-662).
|
||||
Create(ctx context.Context, agent *domain.Agent) error
|
||||
// CreateIfNotExists creates an agent only if the ID doesn't already exist
|
||||
// (INSERT ... ON CONFLICT (id) DO NOTHING). Returns true if the row was
|
||||
// newly inserted, false if a row with the same ID already existed. Used
|
||||
// by the sentinel-agent bootstrap path in cmd/server/main.go so restarts
|
||||
// and upgrades are idempotent without swallowing unrelated database
|
||||
// failures (M-6, CWE-662).
|
||||
CreateIfNotExists(ctx context.Context, agent *domain.Agent) (bool, error)
|
||||
// Update modifies an existing agent.
|
||||
Update(ctx context.Context, agent *domain.Agent) error
|
||||
// Delete removes an agent.
|
||||
|
||||
@@ -70,7 +70,9 @@ func (r *AgentRepository) Get(ctx context.Context, id string) (*domain.Agent, er
|
||||
return agent, nil
|
||||
}
|
||||
|
||||
// Create stores a new agent
|
||||
// Create stores a new agent. Duplicate-key errors surface to the caller —
|
||||
// real-agent registration paths rely on this to detect collisions. Use
|
||||
// CreateIfNotExists for sentinel/bootstrap paths where re-inserts are expected.
|
||||
func (r *AgentRepository) Create(ctx context.Context, agent *domain.Agent) error {
|
||||
if agent.ID == "" {
|
||||
agent.ID = uuid.New().String()
|
||||
@@ -92,6 +94,44 @@ func (r *AgentRepository) Create(ctx context.Context, agent *domain.Agent) error
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateIfNotExists creates an agent only if the ID doesn't already exist.
|
||||
// Used for sentinel agents (server-scanner, cloud-aws-sm, cloud-azure-kv,
|
||||
// cloud-gcp-sm) on first boot AND on every subsequent restart/upgrade — the
|
||||
// pre-M-6 code used plain INSERT, swallowed the duplicate-key error, and so
|
||||
// silently swallowed every other database failure too (CWE-662 /
|
||||
// CWE-209-adjacent). ON CONFLICT (id) DO NOTHING + RETURNING id +
|
||||
// sql.ErrNoRows distinguishes "row already existed" (created=false, err=nil)
|
||||
// from genuine errors (connectivity, permission, constraint violations
|
||||
// other than the id primary key) which still surface. Returns true if the
|
||||
// row was newly inserted, false if a row with the same ID already existed.
|
||||
func (r *AgentRepository) CreateIfNotExists(ctx context.Context, agent *domain.Agent) (bool, error) {
|
||||
if agent.ID == "" {
|
||||
agent.ID = uuid.New().String()
|
||||
}
|
||||
|
||||
var id string
|
||||
err := r.db.QueryRowContext(ctx, `
|
||||
INSERT INTO agents (id, name, hostname, status, last_heartbeat_at, registered_at, api_key_hash,
|
||||
os, architecture, ip_address, version)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
||||
ON CONFLICT (id) DO NOTHING
|
||||
RETURNING id
|
||||
`, agent.ID, agent.Name, agent.Hostname, agent.Status, agent.LastHeartbeatAt,
|
||||
agent.RegisteredAt, agent.APIKeyHash,
|
||||
agent.OS, agent.Architecture, agent.IPAddress, agent.Version).Scan(&id)
|
||||
|
||||
if err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
// ON CONFLICT DO NOTHING — a row with this ID already existed.
|
||||
return false, nil
|
||||
}
|
||||
return false, fmt.Errorf("failed to create agent: %w", err)
|
||||
}
|
||||
|
||||
agent.ID = id
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Update modifies an existing agent
|
||||
func (r *AgentRepository) Update(ctx context.Context, agent *domain.Agent) error {
|
||||
result, err := r.db.ExecContext(ctx, `
|
||||
|
||||
@@ -457,6 +457,193 @@ func TestAgentRepository_Delete_NotFound(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentRepository_CreateIfNotExists_FirstInsert verifies that a brand-new
|
||||
// sentinel agent row is inserted and the helper reports created=true (M-6).
|
||||
func TestAgentRepository_CreateIfNotExists_FirstInsert(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewAgentRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
now := time.Now().Truncate(time.Microsecond)
|
||||
agent := &domain.Agent{
|
||||
ID: "server-scanner",
|
||||
Name: "Network Scanner (Server-Side)",
|
||||
Status: domain.AgentStatusOnline,
|
||||
RegisteredAt: now,
|
||||
}
|
||||
|
||||
created, err := repo.CreateIfNotExists(ctx, agent)
|
||||
if err != nil {
|
||||
t.Fatalf("CreateIfNotExists failed: %v", err)
|
||||
}
|
||||
if !created {
|
||||
t.Error("created = false on first insert, want true")
|
||||
}
|
||||
|
||||
got, err := repo.Get(ctx, "server-scanner")
|
||||
if err != nil {
|
||||
t.Fatalf("Get failed: %v", err)
|
||||
}
|
||||
if got.Name != "Network Scanner (Server-Side)" {
|
||||
t.Errorf("Name = %q, want %q", got.Name, "Network Scanner (Server-Side)")
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentRepository_CreateIfNotExists_Idempotent verifies that a second
|
||||
// call with the same ID returns created=false and err=nil without mutating
|
||||
// the existing row — the core M-6 upgrade/restart scenario (CWE-662).
|
||||
func TestAgentRepository_CreateIfNotExists_Idempotent(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewAgentRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
now := time.Now().Truncate(time.Microsecond)
|
||||
first := &domain.Agent{
|
||||
ID: "cloud-aws-sm",
|
||||
Name: "AWS Secrets Manager Discovery",
|
||||
Status: domain.AgentStatusOnline,
|
||||
RegisteredAt: now,
|
||||
}
|
||||
created, err := repo.CreateIfNotExists(ctx, first)
|
||||
if err != nil {
|
||||
t.Fatalf("first CreateIfNotExists failed: %v", err)
|
||||
}
|
||||
if !created {
|
||||
t.Fatal("first created = false, want true")
|
||||
}
|
||||
|
||||
// Second call with the same ID but a different name must be a no-op.
|
||||
second := &domain.Agent{
|
||||
ID: "cloud-aws-sm",
|
||||
Name: "Overwritten Name Should Not Persist",
|
||||
Status: domain.AgentStatusOffline,
|
||||
RegisteredAt: now.Add(time.Hour),
|
||||
}
|
||||
created, err = repo.CreateIfNotExists(ctx, second)
|
||||
if err != nil {
|
||||
t.Fatalf("second CreateIfNotExists failed: %v", err)
|
||||
}
|
||||
if created {
|
||||
t.Error("second created = true, want false (row already existed)")
|
||||
}
|
||||
|
||||
// Row must still reflect the original insert.
|
||||
got, err := repo.Get(ctx, "cloud-aws-sm")
|
||||
if err != nil {
|
||||
t.Fatalf("Get failed: %v", err)
|
||||
}
|
||||
if got.Name != "AWS Secrets Manager Discovery" {
|
||||
t.Errorf("Name = %q, want %q (ON CONFLICT DO NOTHING must preserve original row)", got.Name, "AWS Secrets Manager Discovery")
|
||||
}
|
||||
if got.Status != domain.AgentStatusOnline {
|
||||
t.Errorf("Status = %q, want %q", got.Status, domain.AgentStatusOnline)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentRepository_CreateIfNotExists_ConcurrentRace fires N concurrent
|
||||
// inserts for the same sentinel ID. Exactly one goroutine must see
|
||||
// created=true; every other must see created=false and err=nil. No panics,
|
||||
// no duplicate rows, no swallowed errors. This is the scenario that the
|
||||
// pre-M-6 plain-INSERT path masked with a blanket error log.
|
||||
func TestAgentRepository_CreateIfNotExists_ConcurrentRace(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewAgentRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
const N = 16
|
||||
now := time.Now().Truncate(time.Microsecond)
|
||||
|
||||
var (
|
||||
wg sync.WaitGroup
|
||||
createdCount int64
|
||||
errorCount int64
|
||||
)
|
||||
wg.Add(N)
|
||||
for i := 0; i < N; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
agent := &domain.Agent{
|
||||
ID: "cloud-gcp-sm",
|
||||
Name: "GCP Secret Manager Discovery",
|
||||
Status: domain.AgentStatusOnline,
|
||||
RegisteredAt: now,
|
||||
}
|
||||
created, err := repo.CreateIfNotExists(ctx, agent)
|
||||
if err != nil {
|
||||
atomic.AddInt64(&errorCount, 1)
|
||||
t.Errorf("CreateIfNotExists returned error: %v", err)
|
||||
return
|
||||
}
|
||||
if created {
|
||||
atomic.AddInt64(&createdCount, 1)
|
||||
}
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
if errorCount != 0 {
|
||||
t.Fatalf("errorCount = %d, want 0", errorCount)
|
||||
}
|
||||
if createdCount != 1 {
|
||||
t.Errorf("createdCount = %d, want exactly 1 (only one goroutine may win the insert)", createdCount)
|
||||
}
|
||||
|
||||
// Exactly one row must exist.
|
||||
agents, err := repo.List(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("List failed: %v", err)
|
||||
}
|
||||
count := 0
|
||||
for _, a := range agents {
|
||||
if a.ID == "cloud-gcp-sm" {
|
||||
count++
|
||||
}
|
||||
}
|
||||
if count != 1 {
|
||||
t.Errorf("row count for cloud-gcp-sm = %d, want 1", count)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentRepository_CreateIfNotExists_GenericErrorSurfaces verifies that
|
||||
// failures other than the primary-key duplicate (the only collision
|
||||
// ON CONFLICT (id) absorbs) propagate to the caller instead of being
|
||||
// swallowed. This is the security property that M-6 restores: the
|
||||
// pre-fix plain-INSERT path logged every error at Debug level, so a
|
||||
// connectivity or permission failure would vanish into the log without
|
||||
// the server surfacing a problem on startup (CWE-662 / CWE-209-adjacent).
|
||||
//
|
||||
// Uses a pre-cancelled context to force QueryRowContext to fail with
|
||||
// context.Canceled — a non-duplicate error class that must surface.
|
||||
// Does NOT close the shared sql.DB (that would break sibling tests).
|
||||
func TestAgentRepository_CreateIfNotExists_GenericErrorSurfaces(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewAgentRepository(db)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // pre-cancel so the driver round-trip fails immediately.
|
||||
|
||||
agent := &domain.Agent{
|
||||
ID: "server-scanner",
|
||||
Name: "Network Scanner (Server-Side)",
|
||||
Status: domain.AgentStatusOnline,
|
||||
RegisteredAt: time.Now(),
|
||||
}
|
||||
created, err := repo.CreateIfNotExists(ctx, agent)
|
||||
if err == nil {
|
||||
t.Fatal("expected error on cancelled context, got nil (error would have been swallowed pre-M-6)")
|
||||
}
|
||||
if created {
|
||||
t.Error("created = true on failure, want false")
|
||||
}
|
||||
if err == sql.ErrNoRows {
|
||||
t.Error("got sql.ErrNoRows, want a real connection/context error (ErrNoRows is the duplicate-row sentinel)")
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Issuer Repository Tests
|
||||
// ============================================================
|
||||
|
||||
@@ -602,6 +602,19 @@ func (m *mockAgentRepo) Create(ctx context.Context, agent *domain.Agent) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockAgentRepo) CreateIfNotExists(ctx context.Context, agent *domain.Agent) (bool, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if m.CreateErr != nil {
|
||||
return false, m.CreateErr
|
||||
}
|
||||
if _, exists := m.Agents[agent.ID]; exists {
|
||||
return false, nil
|
||||
}
|
||||
m.Agents[agent.ID] = agent
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (m *mockAgentRepo) Update(ctx context.Context, agent *domain.Agent) error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
Reference in New Issue
Block a user