Skip to content

Commit e1b5f34

Browse files
committed
runtime: reduce max idle mark workers during periodic GC cycles
This change reduces the maximum number of idle mark workers during periodic (currently every 2 minutes) GC cycles to 1. Idle mark workers soak up all available and unused Ps, up to GOMAXPROCS. While this provides some throughput and latency benefit in general, it can cause what appear to be massive CPU utilization spikes in otherwise idle applications. This is mostly an issue for *very* idle applications, ones idle enough to trigger periodic GC cycles. This spike also tends to interact poorly with auto-scaling systems, as the system might assume the load average is very low and suddenly see a massive burst in activity. The result of this change is not to bring down this 100% (of GOMAXPROCS) CPU utilization spike to 0%, but rather min(25% + 1/GOMAXPROCS*100%, 100%) Idle mark workers also do incur a small latency penalty as they must be descheduled for other work that might pop up. Luckily the runtime is pretty good about getting idle mark workers off of Ps, so in general the latency benefit from shorter GC cycles outweighs this cost. But, the cost is still non-zero and may be more significant in idle applications that aren't invoking assists and write barriers quite as often. We can't completely eliminate idle mark workers because they're currently necessary for GC progress in some circumstances. Namely, they're critical for progress when all we have is fractional workers. If a fractional worker meets its quota, and all user goroutines are blocked directly or indirectly on a GC cycle (via runtime.GOMAXPROCS, or runtime.GC), the program may deadlock without GC workers, since the fractional worker will go to sleep with nothing to wake it. Fixes #37116. For #44163. Change-Id: Ib74793bb6b88d1765c52d445831310b0d11ef423 Reviewed-on: https://go-review.googlesource.com/c/go/+/393394 Reviewed-by: Michael Pratt <[email protected]> Run-TryBot: Michael Knyszek <[email protected]> TryBot-Result: Gopher Robot <[email protected]>
1 parent 226346b commit e1b5f34

File tree

5 files changed

+223
-17
lines changed

5 files changed

+223
-17
lines changed

src/runtime/export_test.go

+17-1
Original file line numberDiff line numberDiff line change
@@ -1271,7 +1271,7 @@ func (c *GCController) StartCycle(stackSize, globalsSize uint64, scannableFrac f
12711271
c.globalsScan = globalsSize
12721272
c.heapLive = c.trigger
12731273
c.heapScan += uint64(float64(c.trigger-c.heapMarked) * scannableFrac)
1274-
c.startCycle(0, gomaxprocs)
1274+
c.startCycle(0, gomaxprocs, gcTrigger{kind: gcTriggerHeap})
12751275
}
12761276

12771277
func (c *GCController) AssistWorkPerByte() float64 {
@@ -1318,6 +1318,22 @@ func (c *GCController) EndCycle(bytesMarked uint64, assistTime, elapsed int64, g
13181318
c.commit()
13191319
}
13201320

1321+
func (c *GCController) AddIdleMarkWorker() bool {
1322+
return c.addIdleMarkWorker()
1323+
}
1324+
1325+
func (c *GCController) NeedIdleMarkWorker() bool {
1326+
return c.needIdleMarkWorker()
1327+
}
1328+
1329+
func (c *GCController) RemoveIdleMarkWorker() {
1330+
c.removeIdleMarkWorker()
1331+
}
1332+
1333+
func (c *GCController) SetMaxIdleMarkWorkers(max int32) {
1334+
c.setMaxIdleMarkWorkers(max)
1335+
}
1336+
13211337
var escapeSink any
13221338

13231339
//go:noinline

src/runtime/mgc.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ func gcStart(trigger gcTrigger) {
672672

673673
// Assists and workers can start the moment we start
674674
// the world.
675-
gcController.startCycle(now, int(gomaxprocs))
675+
gcController.startCycle(now, int(gomaxprocs), trigger)
676676
work.heapGoal = gcController.heapGoal
677677

678678
// In STW mode, disable scheduling of user Gs. This may also
@@ -1297,9 +1297,9 @@ func gcBgMarkWorker() {
12971297
casgstatus(gp, _Gwaiting, _Grunning)
12981298
})
12991299

1300-
// Account for time.
1300+
// Account for time and mark us as stopped.
13011301
duration := nanotime() - startTime
1302-
gcController.logWorkTime(pp.gcMarkWorkerMode, duration)
1302+
gcController.markWorkerStop(pp.gcMarkWorkerMode, duration)
13031303
if pp.gcMarkWorkerMode == gcMarkWorkerFractionalMode {
13041304
atomic.Xaddint64(&pp.gcFractionalMarkTime, duration)
13051305
}

src/runtime/mgcpacer.go

+127-5
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,35 @@ type gcControllerState struct {
280280
// dedicated mark workers get started.
281281
dedicatedMarkWorkersNeeded int64
282282

283+
// idleMarkWorkers is two packed int32 values in a single uint64.
284+
// These two values are always updated simultaneously.
285+
//
286+
// The bottom int32 is the current number of idle mark workers executing.
287+
//
288+
// The top int32 is the maximum number of idle mark workers allowed to
289+
// execute concurrently. Normally, this number is just gomaxprocs. However,
290+
// during periodic GC cycles it is set to 1 because the system is idle
291+
// anyway; there's no need to go full blast on all of GOMAXPROCS.
292+
//
293+
// The maximum number of idle mark workers is used to prevent new workers
294+
// from starting, but it is not a hard maximum. It is possible (but
295+
// exceedingly rare) for the current number of idle mark workers to
296+
// transiently exceed the maximum. This could happen if the maximum changes
297+
// just after a GC ends, and an M with no P.
298+
//
299+
// Note that the maximum may not be zero because idle-priority mark workers
300+
// are vital to GC progress. Consider a situation in which goroutines
301+
// block on the GC (such as via runtime.GOMAXPROCS) and only fractional
302+
// mark workers are scheduled (e.g. GOMAXPROCS=1). Without idle-priority
303+
// mark workers, the last running M might skip scheduling a fractional
304+
// mark worker if its utilization goal is met, such that once it goes to
305+
// sleep (because there's nothing to do), there will be nothing else to
306+
// spin up a new M for the fractional worker in the future, stalling GC
307+
// progress and causing a deadlock. However, idle-priority workers will
308+
// *always* run when there is nothing left to do, ensuring the GC makes
309+
// progress.
310+
idleMarkWorkers atomic.Uint64
311+
283312
// assistWorkPerByte is the ratio of scan work to allocated
284313
// bytes that should be performed by mutator assists. This is
285314
// computed at the beginning of each cycle and updated every
@@ -342,7 +371,7 @@ func (c *gcControllerState) init(gcPercent int32) {
342371
// startCycle resets the GC controller's state and computes estimates
343372
// for a new GC cycle. The caller must hold worldsema and the world
344373
// must be stopped.
345-
func (c *gcControllerState) startCycle(markStartTime int64, procs int) {
374+
func (c *gcControllerState) startCycle(markStartTime int64, procs int, trigger gcTrigger) {
346375
c.heapScanWork.Store(0)
347376
c.stackScanWork.Store(0)
348377
c.globalsScanWork.Store(0)
@@ -400,6 +429,18 @@ func (c *gcControllerState) startCycle(markStartTime int64, procs int) {
400429
p.gcFractionalMarkTime = 0
401430
}
402431

432+
if trigger.kind == gcTriggerTime {
433+
// During a periodic GC cycle, avoid having more than
434+
// one idle mark worker running at a time. We need to have
435+
// at least one to ensure the GC makes progress, but more than
436+
// one is unnecessary.
437+
c.setMaxIdleMarkWorkers(1)
438+
} else {
439+
// N.B. gomaxprocs and dedicatedMarkWorkersNeeded is guaranteed not to
440+
// change during a GC cycle.
441+
c.setMaxIdleMarkWorkers(int32(procs) - int32(c.dedicatedMarkWorkersNeeded))
442+
}
443+
403444
// Compute initial values for controls that are updated
404445
// throughout the cycle.
405446
c.revise()
@@ -781,11 +822,13 @@ func (c *gcControllerState) resetLive(bytesMarked uint64) {
781822
}
782823
}
783824

784-
// logWorkTime updates mark work accounting in the controller by a duration of
785-
// work in nanoseconds.
825+
// markWorkerStop must be called whenever a mark worker stops executing.
826+
//
827+
// It updates mark work accounting in the controller by a duration of
828+
// work in nanoseconds and other bookkeeping.
786829
//
787830
// Safe to execute at any time.
788-
func (c *gcControllerState) logWorkTime(mode gcMarkWorkerMode, duration int64) {
831+
func (c *gcControllerState) markWorkerStop(mode gcMarkWorkerMode, duration int64) {
789832
switch mode {
790833
case gcMarkWorkerDedicatedMode:
791834
atomic.Xaddint64(&c.dedicatedMarkTime, duration)
@@ -794,8 +837,9 @@ func (c *gcControllerState) logWorkTime(mode gcMarkWorkerMode, duration int64) {
794837
atomic.Xaddint64(&c.fractionalMarkTime, duration)
795838
case gcMarkWorkerIdleMode:
796839
atomic.Xaddint64(&c.idleMarkTime, duration)
840+
c.removeIdleMarkWorker()
797841
default:
798-
throw("logWorkTime: unknown mark worker mode")
842+
throw("markWorkerStop: unknown mark worker mode")
799843
}
800844
}
801845

@@ -1100,3 +1144,81 @@ func (c *piController) next(input, setpoint, period float64) (float64, bool) {
11001144
func (c *piController) reset() {
11011145
c.errIntegral = 0
11021146
}
1147+
1148+
// addIdleMarkWorker attempts to add a new idle mark worker.
1149+
//
1150+
// If this returns true, the caller must become an idle mark worker unless
1151+
// there's no background mark worker goroutines in the pool. This case is
1152+
// harmless because there are already background mark workers running.
1153+
// If this returns false, the caller must NOT become an idle mark worker.
1154+
//
1155+
// nosplit because it may be called without a P.
1156+
//go:nosplit
1157+
func (c *gcControllerState) addIdleMarkWorker() bool {
1158+
for {
1159+
old := c.idleMarkWorkers.Load()
1160+
n, max := int32(old&uint64(^uint32(0))), int32(old>>32)
1161+
if n >= max {
1162+
// See the comment on idleMarkWorkers for why
1163+
// n > max is tolerated.
1164+
return false
1165+
}
1166+
if n < 0 {
1167+
print("n=", n, " max=", max, "\n")
1168+
throw("negative idle mark workers")
1169+
}
1170+
new := uint64(uint32(n+1)) | (uint64(max) << 32)
1171+
if c.idleMarkWorkers.CompareAndSwap(old, new) {
1172+
return true
1173+
}
1174+
}
1175+
}
1176+
1177+
// needIdleMarkWorker is a hint as to whether another idle mark worker is needed.
1178+
//
1179+
// The caller must still call addIdleMarkWorker to become one. This is mainly
1180+
// useful for a quick check before an expensive operation.
1181+
//
1182+
// nosplit because it may be called without a P.
1183+
//go:nosplit
1184+
func (c *gcControllerState) needIdleMarkWorker() bool {
1185+
p := c.idleMarkWorkers.Load()
1186+
n, max := int32(p&uint64(^uint32(0))), int32(p>>32)
1187+
return n < max
1188+
}
1189+
1190+
// removeIdleMarkWorker must be called when an new idle mark worker stops executing.
1191+
func (c *gcControllerState) removeIdleMarkWorker() {
1192+
for {
1193+
old := c.idleMarkWorkers.Load()
1194+
n, max := int32(old&uint64(^uint32(0))), int32(old>>32)
1195+
if n-1 < 0 {
1196+
print("n=", n, " max=", max, "\n")
1197+
throw("negative idle mark workers")
1198+
}
1199+
new := uint64(uint32(n-1)) | (uint64(max) << 32)
1200+
if c.idleMarkWorkers.CompareAndSwap(old, new) {
1201+
return
1202+
}
1203+
}
1204+
}
1205+
1206+
// setMaxIdleMarkWorkers sets the maximum number of idle mark workers allowed.
1207+
//
1208+
// This method is optimistic in that it does not wait for the number of
1209+
// idle mark workers to reduce to max before returning; it assumes the workers
1210+
// will deschedule themselves.
1211+
func (c *gcControllerState) setMaxIdleMarkWorkers(max int32) {
1212+
for {
1213+
old := c.idleMarkWorkers.Load()
1214+
n := int32(old & uint64(^uint32(0)))
1215+
if n < 0 {
1216+
print("n=", n, " max=", max, "\n")
1217+
throw("negative idle mark workers")
1218+
}
1219+
new := uint64(uint32(n)) | (uint64(max) << 32)
1220+
if c.idleMarkWorkers.CompareAndSwap(old, new) {
1221+
return
1222+
}
1223+
}
1224+
}

src/runtime/mgcpacer_test.go

+64
Original file line numberDiff line numberDiff line change
@@ -738,3 +738,67 @@ func FuzzPIController(f *testing.F) {
738738
}
739739
})
740740
}
741+
742+
func TestIdleMarkWorkerCount(t *testing.T) {
743+
const workers = 10
744+
c := NewGCController(100)
745+
c.SetMaxIdleMarkWorkers(workers)
746+
for i := 0; i < workers; i++ {
747+
if !c.NeedIdleMarkWorker() {
748+
t.Fatalf("expected to need idle mark workers: i=%d", i)
749+
}
750+
if !c.AddIdleMarkWorker() {
751+
t.Fatalf("expected to be able to add an idle mark worker: i=%d", i)
752+
}
753+
}
754+
if c.NeedIdleMarkWorker() {
755+
t.Fatalf("expected to not need idle mark workers")
756+
}
757+
if c.AddIdleMarkWorker() {
758+
t.Fatalf("expected to not be able to add an idle mark worker")
759+
}
760+
for i := 0; i < workers; i++ {
761+
c.RemoveIdleMarkWorker()
762+
if !c.NeedIdleMarkWorker() {
763+
t.Fatalf("expected to need idle mark workers after removal: i=%d", i)
764+
}
765+
}
766+
for i := 0; i < workers-1; i++ {
767+
if !c.AddIdleMarkWorker() {
768+
t.Fatalf("expected to be able to add idle mark workers after adding again: i=%d", i)
769+
}
770+
}
771+
for i := 0; i < 10; i++ {
772+
if !c.AddIdleMarkWorker() {
773+
t.Fatalf("expected to be able to add idle mark workers interleaved: i=%d", i)
774+
}
775+
if c.AddIdleMarkWorker() {
776+
t.Fatalf("expected to not be able to add idle mark workers interleaved: i=%d", i)
777+
}
778+
c.RemoveIdleMarkWorker()
779+
}
780+
// Support the max being below the count.
781+
c.SetMaxIdleMarkWorkers(0)
782+
if c.NeedIdleMarkWorker() {
783+
t.Fatalf("expected to not need idle mark workers after capacity set to 0")
784+
}
785+
if c.AddIdleMarkWorker() {
786+
t.Fatalf("expected to not be able to add idle mark workers after capacity set to 0")
787+
}
788+
for i := 0; i < workers-1; i++ {
789+
c.RemoveIdleMarkWorker()
790+
}
791+
if c.NeedIdleMarkWorker() {
792+
t.Fatalf("expected to not need idle mark workers after capacity set to 0")
793+
}
794+
if c.AddIdleMarkWorker() {
795+
t.Fatalf("expected to not be able to add idle mark workers after capacity set to 0")
796+
}
797+
c.SetMaxIdleMarkWorkers(1)
798+
if !c.NeedIdleMarkWorker() {
799+
t.Fatalf("expected to need idle mark workers after capacity set to 1")
800+
}
801+
if !c.AddIdleMarkWorker() {
802+
t.Fatalf("expected to be able to add idle mark workers after capacity set to 1")
803+
}
804+
}

src/runtime/proc.go

+12-8
Original file line numberDiff line numberDiff line change
@@ -2629,9 +2629,8 @@ top:
26292629
// We have nothing to do.
26302630
//
26312631
// If we're in the GC mark phase, can safely scan and blacken objects,
2632-
// and have work to do, run idle-time marking rather than give up the
2633-
// P.
2634-
if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) {
2632+
// and have work to do, run idle-time marking rather than give up the P.
2633+
if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) && gcController.addIdleMarkWorker() {
26352634
node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())
26362635
if node != nil {
26372636
_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
@@ -2642,6 +2641,7 @@ top:
26422641
}
26432642
return gp, false
26442643
}
2644+
gcController.removeIdleMarkWorker()
26452645
}
26462646

26472647
// wasm only:
@@ -2959,8 +2959,12 @@ func checkTimersNoP(allpSnapshot []*p, timerpMaskSnapshot pMask, pollUntil int64
29592959
// returned. The returned P has not been wired yet.
29602960
func checkIdleGCNoP() (*p, *g) {
29612961
// N.B. Since we have no P, gcBlackenEnabled may change at any time; we
2962-
// must check again after acquiring a P.
2963-
if atomic.Load(&gcBlackenEnabled) == 0 {
2962+
// must check again after acquiring a P. As an optimization, we also check
2963+
// if an idle mark worker is needed at all. This is OK here, because if we
2964+
// observe that one isn't needed, at least one is currently running. Even if
2965+
// it stops running, its own journey into the scheduler should schedule it
2966+
// again, if need be (at which point, this check will pass, if relevant).
2967+
if atomic.Load(&gcBlackenEnabled) == 0 || !gcController.needIdleMarkWorker() {
29642968
return nil, nil
29652969
}
29662970
if !gcMarkWorkAvailable(nil) {
@@ -2991,9 +2995,8 @@ func checkIdleGCNoP() (*p, *g) {
29912995
return nil, nil
29922996
}
29932997

2994-
// Now that we own a P, gcBlackenEnabled can't change (as it requires
2995-
// STW).
2996-
if gcBlackenEnabled == 0 {
2998+
// Now that we own a P, gcBlackenEnabled can't change (as it requires STW).
2999+
if gcBlackenEnabled == 0 || !gcController.addIdleMarkWorker() {
29973000
pidleput(pp)
29983001
unlock(&sched.lock)
29993002
return nil, nil
@@ -3003,6 +3006,7 @@ func checkIdleGCNoP() (*p, *g) {
30033006
if node == nil {
30043007
pidleput(pp)
30053008
unlock(&sched.lock)
3009+
gcController.removeIdleMarkWorker()
30063010
return nil, nil
30073011
}
30083012

0 commit comments

Comments
 (0)