fix: scheduler race — track loop goroutines in WaitGroup

Root cause: WaitForCompletion only waited for work goroutines (wg),
but the 5-6 loop goroutines (renewalCheckLoop, jobProcessorLoop, etc.)
were not tracked. After cancel() + WaitForCompletion(), loop goroutines
could still be alive accessing scheduler/mock fields when the next test
started, triggering the race detector.

Fix:
- Start() now adds loop goroutines to wg, so WaitForCompletion blocks
  until both work items AND loops have fully exited
- Removed untracked 100ms timer goroutine for startedChan — now closed
  immediately after launching loops
- Timeout test updated: uses blockCh (ignores context) instead of
  slowDelay (respects context) so it reliably triggers the timeout path

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
shankar0123
2026-03-27 23:31:52 -04:00
parent d27cf3545b
commit 01607f8614
2 changed files with 33 additions and 28 deletions
+17 -13
View File
@@ -126,21 +126,25 @@ func (s *Scheduler) Start(ctx context.Context) <-chan struct{} {
go func() {
s.logger.Info("scheduler starting")
// Signal that the scheduler has started all loops
go func() {
<-time.After(100 * time.Millisecond)
close(startedChan)
}()
// Start all scheduler loops concurrently
go s.renewalCheckLoop(ctx)
go s.jobProcessorLoop(ctx)
go s.agentHealthCheckLoop(ctx)
go s.notificationProcessLoop(ctx)
go s.shortLivedExpiryCheckLoop(ctx)
// Track all loop goroutines in the WaitGroup so WaitForCompletion
// blocks until they've fully exited (prevents test races).
loopCount := 5
if s.networkScanService != nil {
go s.networkScanLoop(ctx)
loopCount = 6
}
s.wg.Add(loopCount)
go func() { defer s.wg.Done(); s.renewalCheckLoop(ctx) }()
go func() { defer s.wg.Done(); s.jobProcessorLoop(ctx) }()
go func() { defer s.wg.Done(); s.agentHealthCheckLoop(ctx) }()
go func() { defer s.wg.Done(); s.notificationProcessLoop(ctx) }()
go func() { defer s.wg.Done(); s.shortLivedExpiryCheckLoop(ctx) }()
if s.networkScanService != nil {
go func() { defer s.wg.Done(); s.networkScanLoop(ctx) }()
}
// Signal that all loops are launched
close(startedChan)
// Wait for context cancellation
<-ctx.Done()