Skip to content

Commit ff4325f

Browse files
committed
fix: prevent goroutine leak on manager shutdown timeout
1 parent dcfb18d commit ff4325f

File tree

2 files changed

+33
-2
lines changed

2 files changed

+33
-2
lines changed

pkg/manager/manager.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ func New(config *rest.Config, options Options) (Manager, error) {
417417
}
418418

419419
errChan := make(chan error, 1)
420-
runnables := newRunnables(options.BaseContext, errChan)
420+
runnables := newRunnables(options.BaseContext, errChan).withLogger(options.Logger)
421421
return &controllerManager{
422422
stopProcedureEngaged: ptr.To(int64(0)),
423423
cluster: cluster,

pkg/manager/runnable_group.go

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"errors"
66
"sync"
77

8+
"github.com/go-logr/logr"
89
"sigs.k8s.io/controller-runtime/pkg/webhook"
910
)
1011

@@ -46,6 +47,16 @@ func newRunnables(baseContext BaseContextFunc, errChan chan error) *runnables {
4647
}
4748
}
4849

50+
// withLogger sets the logger for all runnable groups.
51+
func (r *runnables) withLogger(logger logr.Logger) *runnables {
52+
r.HTTPServers.withLogger(logger)
53+
r.Webhooks.withLogger(logger)
54+
r.Caches.withLogger(logger)
55+
r.LeaderElection.withLogger(logger)
56+
r.Others.withLogger(logger)
57+
return r
58+
}
59+
4960
// Add adds a runnable to closest group of runnable that they belong to.
5061
//
5162
// Add should be able to be called before and after Start, but not after StopAndWait.
@@ -105,6 +116,9 @@ type runnableGroup struct {
105116
// wg is an internal sync.WaitGroup that allows us to properly stop
106117
// and wait for all the runnables to finish before returning.
107118
wg *sync.WaitGroup
119+
120+
// logger is used for logging when errors are dropped during shutdown
121+
logger logr.Logger
108122
}
109123

110124
func newRunnableGroup(baseContext BaseContextFunc, errChan chan error) *runnableGroup {
@@ -113,12 +127,19 @@ func newRunnableGroup(baseContext BaseContextFunc, errChan chan error) *runnable
113127
errChan: errChan,
114128
ch: make(chan *readyRunnable),
115129
wg: new(sync.WaitGroup),
130+
logger: logr.Discard(), // Default to no-op logger
116131
}
117132

118133
r.ctx, r.cancel = context.WithCancel(baseContext())
119134
return r
120135
}
121136

137+
// withLogger sets the logger for this runnable group.
138+
func (r *runnableGroup) withLogger(logger logr.Logger) *runnableGroup {
139+
r.logger = logger
140+
return r
141+
}
142+
122143
// Started returns true if the group has started.
123144
func (r *runnableGroup) Started() bool {
124145
r.start.Lock()
@@ -224,7 +245,17 @@ func (r *runnableGroup) reconcile() {
224245

225246
// Start the runnable.
226247
if err := rn.Start(r.ctx); err != nil {
227-
r.errChan <- err
248+
// Send error with context awareness to prevent blocking during shutdown
249+
select {
250+
case r.errChan <- err:
251+
// Error sent successfully
252+
case <-r.ctx.Done():
253+
// Context cancelled (shutdown), drop error to prevent blocking forever
254+
// This prevents goroutine leaks when error drain go routine has exited after timeout
255+
if !errors.Is(err, context.Canceled) { // don't log context.Canceled errors as they are expected during shutdown
256+
r.logger.Info("error dropped during shutdown to prevent goroutine leak", "error", err)
257+
}
258+
}
228259
}
229260
}(runnable)
230261
}

0 commit comments

Comments
 (0)