diff --git a/cmd/server/main.go b/cmd/server/main.go index 92b155a..00573f2 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -253,9 +253,15 @@ func main() { Name: "Network Scanner (Server-Side)", Status: domain.AgentStatusOnline, } - if err := agentRepo.Create(context.Background(), sentinelAgent); err != nil { - // Ignore duplicate key errors (agent already exists) - logger.Debug("sentinel agent creation", "status", "exists or created", "id", service.SentinelAgentID) + // M-6: use CreateIfNotExists so duplicate rows on restart/upgrade are + // idempotent without swallowing unrelated DB failures (CWE-662). + created, err := agentRepo.CreateIfNotExists(context.Background(), sentinelAgent) + if err != nil { + logger.Error("sentinel agent creation failed", "id", service.SentinelAgentID, "error", err) + } else if created { + logger.Info("sentinel agent created", "id", service.SentinelAgentID) + } else { + logger.Debug("sentinel agent already exists", "id", service.SentinelAgentID) } } @@ -274,8 +280,14 @@ func main() { Name: "AWS Secrets Manager Discovery", Status: domain.AgentStatusOnline, } - if err := agentRepo.Create(context.Background(), sentinelAWS); err != nil { - logger.Debug("sentinel agent creation", "status", "exists or created", "id", service.SentinelAWSSecretsMgr) + // M-6: idempotent create (CWE-662). + created, err := agentRepo.CreateIfNotExists(context.Background(), sentinelAWS) + if err != nil { + logger.Error("sentinel agent creation failed", "id", service.SentinelAWSSecretsMgr, "error", err) + } else if created { + logger.Info("sentinel agent created", "id", service.SentinelAWSSecretsMgr) + } else { + logger.Debug("sentinel agent already exists", "id", service.SentinelAWSSecretsMgr) } } @@ -293,8 +305,14 @@ func main() { Name: "Azure Key Vault Discovery", Status: domain.AgentStatusOnline, } - if err := agentRepo.Create(context.Background(), sentinelAzure); err != nil { - logger.Debug("sentinel agent creation", "status", "exists or created", "id", service.SentinelAzureKeyVault) + // M-6: idempotent create (CWE-662). + created, err := agentRepo.CreateIfNotExists(context.Background(), sentinelAzure) + if err != nil { + logger.Error("sentinel agent creation failed", "id", service.SentinelAzureKeyVault, "error", err) + } else if created { + logger.Info("sentinel agent created", "id", service.SentinelAzureKeyVault) + } else { + logger.Debug("sentinel agent already exists", "id", service.SentinelAzureKeyVault) } } @@ -307,8 +325,14 @@ func main() { Name: "GCP Secret Manager Discovery", Status: domain.AgentStatusOnline, } - if err := agentRepo.Create(context.Background(), sentinelGCP); err != nil { - logger.Debug("sentinel agent creation", "status", "exists or created", "id", service.SentinelGCPSecretMgr) + // M-6: idempotent create (CWE-662). + created, err := agentRepo.CreateIfNotExists(context.Background(), sentinelGCP) + if err != nil { + logger.Error("sentinel agent creation failed", "id", service.SentinelGCPSecretMgr, "error", err) + } else if created { + logger.Info("sentinel agent created", "id", service.SentinelGCPSecretMgr) + } else { + logger.Debug("sentinel agent already exists", "id", service.SentinelGCPSecretMgr) } } diff --git a/internal/integration/lifecycle_test.go b/internal/integration/lifecycle_test.go index 9e9b210..b6b7137 100644 --- a/internal/integration/lifecycle_test.go +++ b/internal/integration/lifecycle_test.go @@ -772,6 +772,14 @@ func (m *mockAgentRepository) Create(ctx context.Context, agent *domain.Agent) e return nil } +func (m *mockAgentRepository) CreateIfNotExists(ctx context.Context, agent *domain.Agent) (bool, error) { + if _, exists := m.agents[agent.ID]; exists { + return false, nil + } + m.agents[agent.ID] = agent + return true, nil +} + func (m *mockAgentRepository) Update(ctx context.Context, agent *domain.Agent) error { m.agents[agent.ID] = agent return nil diff --git a/internal/repository/interfaces.go b/internal/repository/interfaces.go index 26ba1cb..ff23b5f 100644 --- a/internal/repository/interfaces.go +++ b/internal/repository/interfaces.go @@ -90,8 +90,18 @@ type AgentRepository interface { List(ctx context.Context) ([]*domain.Agent, error) // Get retrieves an agent by ID. Get(ctx context.Context, id string) (*domain.Agent, error) - // Create stores a new agent. + // Create stores a new agent. Callers that want duplicate-key errors surfaced + // (e.g. real-agent registration) must use this method; sentinel/bootstrap + // paths that expect the row to already exist on restart should call + // CreateIfNotExists instead (M-6, CWE-662). Create(ctx context.Context, agent *domain.Agent) error + // CreateIfNotExists creates an agent only if the ID doesn't already exist + // (INSERT ... ON CONFLICT (id) DO NOTHING). Returns true if the row was + // newly inserted, false if a row with the same ID already existed. Used + // by the sentinel-agent bootstrap path in cmd/server/main.go so restarts + // and upgrades are idempotent without swallowing unrelated database + // failures (M-6, CWE-662). + CreateIfNotExists(ctx context.Context, agent *domain.Agent) (bool, error) // Update modifies an existing agent. Update(ctx context.Context, agent *domain.Agent) error // Delete removes an agent. diff --git a/internal/repository/postgres/agent.go b/internal/repository/postgres/agent.go index c945a28..0b406fc 100644 --- a/internal/repository/postgres/agent.go +++ b/internal/repository/postgres/agent.go @@ -70,7 +70,9 @@ func (r *AgentRepository) Get(ctx context.Context, id string) (*domain.Agent, er return agent, nil } -// Create stores a new agent +// Create stores a new agent. Duplicate-key errors surface to the caller — +// real-agent registration paths rely on this to detect collisions. Use +// CreateIfNotExists for sentinel/bootstrap paths where re-inserts are expected. func (r *AgentRepository) Create(ctx context.Context, agent *domain.Agent) error { if agent.ID == "" { agent.ID = uuid.New().String() @@ -92,6 +94,44 @@ func (r *AgentRepository) Create(ctx context.Context, agent *domain.Agent) error return nil } +// CreateIfNotExists creates an agent only if the ID doesn't already exist. +// Used for sentinel agents (server-scanner, cloud-aws-sm, cloud-azure-kv, +// cloud-gcp-sm) on first boot AND on every subsequent restart/upgrade — the +// pre-M-6 code used plain INSERT, swallowed the duplicate-key error, and so +// silently swallowed every other database failure too (CWE-662 / +// CWE-209-adjacent). ON CONFLICT (id) DO NOTHING + RETURNING id + +// sql.ErrNoRows distinguishes "row already existed" (created=false, err=nil) +// from genuine errors (connectivity, permission, constraint violations +// other than the id primary key) which still surface. Returns true if the +// row was newly inserted, false if a row with the same ID already existed. +func (r *AgentRepository) CreateIfNotExists(ctx context.Context, agent *domain.Agent) (bool, error) { + if agent.ID == "" { + agent.ID = uuid.New().String() + } + + var id string + err := r.db.QueryRowContext(ctx, ` + INSERT INTO agents (id, name, hostname, status, last_heartbeat_at, registered_at, api_key_hash, + os, architecture, ip_address, version) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) + ON CONFLICT (id) DO NOTHING + RETURNING id + `, agent.ID, agent.Name, agent.Hostname, agent.Status, agent.LastHeartbeatAt, + agent.RegisteredAt, agent.APIKeyHash, + agent.OS, agent.Architecture, agent.IPAddress, agent.Version).Scan(&id) + + if err != nil { + if err == sql.ErrNoRows { + // ON CONFLICT DO NOTHING — a row with this ID already existed. + return false, nil + } + return false, fmt.Errorf("failed to create agent: %w", err) + } + + agent.ID = id + return true, nil +} + // Update modifies an existing agent func (r *AgentRepository) Update(ctx context.Context, agent *domain.Agent) error { result, err := r.db.ExecContext(ctx, ` diff --git a/internal/repository/postgres/repo_test.go b/internal/repository/postgres/repo_test.go index 20d3dc8..7e07fe1 100644 --- a/internal/repository/postgres/repo_test.go +++ b/internal/repository/postgres/repo_test.go @@ -457,6 +457,193 @@ func TestAgentRepository_Delete_NotFound(t *testing.T) { } } +// TestAgentRepository_CreateIfNotExists_FirstInsert verifies that a brand-new +// sentinel agent row is inserted and the helper reports created=true (M-6). +func TestAgentRepository_CreateIfNotExists_FirstInsert(t *testing.T) { + tdb := getTestDB(t) + db := tdb.freshSchema(t) + repo := postgres.NewAgentRepository(db) + ctx := context.Background() + + now := time.Now().Truncate(time.Microsecond) + agent := &domain.Agent{ + ID: "server-scanner", + Name: "Network Scanner (Server-Side)", + Status: domain.AgentStatusOnline, + RegisteredAt: now, + } + + created, err := repo.CreateIfNotExists(ctx, agent) + if err != nil { + t.Fatalf("CreateIfNotExists failed: %v", err) + } + if !created { + t.Error("created = false on first insert, want true") + } + + got, err := repo.Get(ctx, "server-scanner") + if err != nil { + t.Fatalf("Get failed: %v", err) + } + if got.Name != "Network Scanner (Server-Side)" { + t.Errorf("Name = %q, want %q", got.Name, "Network Scanner (Server-Side)") + } +} + +// TestAgentRepository_CreateIfNotExists_Idempotent verifies that a second +// call with the same ID returns created=false and err=nil without mutating +// the existing row — the core M-6 upgrade/restart scenario (CWE-662). +func TestAgentRepository_CreateIfNotExists_Idempotent(t *testing.T) { + tdb := getTestDB(t) + db := tdb.freshSchema(t) + repo := postgres.NewAgentRepository(db) + ctx := context.Background() + + now := time.Now().Truncate(time.Microsecond) + first := &domain.Agent{ + ID: "cloud-aws-sm", + Name: "AWS Secrets Manager Discovery", + Status: domain.AgentStatusOnline, + RegisteredAt: now, + } + created, err := repo.CreateIfNotExists(ctx, first) + if err != nil { + t.Fatalf("first CreateIfNotExists failed: %v", err) + } + if !created { + t.Fatal("first created = false, want true") + } + + // Second call with the same ID but a different name must be a no-op. + second := &domain.Agent{ + ID: "cloud-aws-sm", + Name: "Overwritten Name Should Not Persist", + Status: domain.AgentStatusOffline, + RegisteredAt: now.Add(time.Hour), + } + created, err = repo.CreateIfNotExists(ctx, second) + if err != nil { + t.Fatalf("second CreateIfNotExists failed: %v", err) + } + if created { + t.Error("second created = true, want false (row already existed)") + } + + // Row must still reflect the original insert. + got, err := repo.Get(ctx, "cloud-aws-sm") + if err != nil { + t.Fatalf("Get failed: %v", err) + } + if got.Name != "AWS Secrets Manager Discovery" { + t.Errorf("Name = %q, want %q (ON CONFLICT DO NOTHING must preserve original row)", got.Name, "AWS Secrets Manager Discovery") + } + if got.Status != domain.AgentStatusOnline { + t.Errorf("Status = %q, want %q", got.Status, domain.AgentStatusOnline) + } +} + +// TestAgentRepository_CreateIfNotExists_ConcurrentRace fires N concurrent +// inserts for the same sentinel ID. Exactly one goroutine must see +// created=true; every other must see created=false and err=nil. No panics, +// no duplicate rows, no swallowed errors. This is the scenario that the +// pre-M-6 plain-INSERT path masked with a blanket error log. +func TestAgentRepository_CreateIfNotExists_ConcurrentRace(t *testing.T) { + tdb := getTestDB(t) + db := tdb.freshSchema(t) + repo := postgres.NewAgentRepository(db) + ctx := context.Background() + + const N = 16 + now := time.Now().Truncate(time.Microsecond) + + var ( + wg sync.WaitGroup + createdCount int64 + errorCount int64 + ) + wg.Add(N) + for i := 0; i < N; i++ { + go func() { + defer wg.Done() + agent := &domain.Agent{ + ID: "cloud-gcp-sm", + Name: "GCP Secret Manager Discovery", + Status: domain.AgentStatusOnline, + RegisteredAt: now, + } + created, err := repo.CreateIfNotExists(ctx, agent) + if err != nil { + atomic.AddInt64(&errorCount, 1) + t.Errorf("CreateIfNotExists returned error: %v", err) + return + } + if created { + atomic.AddInt64(&createdCount, 1) + } + }() + } + wg.Wait() + + if errorCount != 0 { + t.Fatalf("errorCount = %d, want 0", errorCount) + } + if createdCount != 1 { + t.Errorf("createdCount = %d, want exactly 1 (only one goroutine may win the insert)", createdCount) + } + + // Exactly one row must exist. + agents, err := repo.List(ctx) + if err != nil { + t.Fatalf("List failed: %v", err) + } + count := 0 + for _, a := range agents { + if a.ID == "cloud-gcp-sm" { + count++ + } + } + if count != 1 { + t.Errorf("row count for cloud-gcp-sm = %d, want 1", count) + } +} + +// TestAgentRepository_CreateIfNotExists_GenericErrorSurfaces verifies that +// failures other than the primary-key duplicate (the only collision +// ON CONFLICT (id) absorbs) propagate to the caller instead of being +// swallowed. This is the security property that M-6 restores: the +// pre-fix plain-INSERT path logged every error at Debug level, so a +// connectivity or permission failure would vanish into the log without +// the server surfacing a problem on startup (CWE-662 / CWE-209-adjacent). +// +// Uses a pre-cancelled context to force QueryRowContext to fail with +// context.Canceled — a non-duplicate error class that must surface. +// Does NOT close the shared sql.DB (that would break sibling tests). +func TestAgentRepository_CreateIfNotExists_GenericErrorSurfaces(t *testing.T) { + tdb := getTestDB(t) + db := tdb.freshSchema(t) + repo := postgres.NewAgentRepository(db) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() // pre-cancel so the driver round-trip fails immediately. + + agent := &domain.Agent{ + ID: "server-scanner", + Name: "Network Scanner (Server-Side)", + Status: domain.AgentStatusOnline, + RegisteredAt: time.Now(), + } + created, err := repo.CreateIfNotExists(ctx, agent) + if err == nil { + t.Fatal("expected error on cancelled context, got nil (error would have been swallowed pre-M-6)") + } + if created { + t.Error("created = true on failure, want false") + } + if err == sql.ErrNoRows { + t.Error("got sql.ErrNoRows, want a real connection/context error (ErrNoRows is the duplicate-row sentinel)") + } +} + // ============================================================ // Issuer Repository Tests // ============================================================ diff --git a/internal/service/testutil_test.go b/internal/service/testutil_test.go index 152405c..b913fcc 100644 --- a/internal/service/testutil_test.go +++ b/internal/service/testutil_test.go @@ -602,6 +602,19 @@ func (m *mockAgentRepo) Create(ctx context.Context, agent *domain.Agent) error { return nil } +func (m *mockAgentRepo) CreateIfNotExists(ctx context.Context, agent *domain.Agent) (bool, error) { + m.mu.Lock() + defer m.mu.Unlock() + if m.CreateErr != nil { + return false, m.CreateErr + } + if _, exists := m.Agents[agent.ID]; exists { + return false, nil + } + m.Agents[agent.ID] = agent + return true, nil +} + func (m *mockAgentRepo) Update(ctx context.Context, agent *domain.Agent) error { m.mu.Lock() defer m.mu.Unlock()