Skip to content

Commit a475a38

Browse files
committed
runtime: parallelize STW mcache flushing
Currently all mcaches are flushed in a single STW root job. This takes about 5 µs per P, but since it's done sequentially it adds about 5*GOMAXPROCS µs to the STW. Fix this by parallelizing the job. Since there are exactly GOMAXPROCS mcaches to flush, this parallelizes quite nicely and brings the STW latency cost down to a constant 5 µs (assuming GOMAXPROCS actually reflects the number of CPUs). Updates #17503. Change-Id: Ibefeb1c2229975d5137c6e67fac3b6c92103742d Reviewed-on: https://go-review.googlesource.com/32033 Reviewed-by: Rick Hudson <[email protected]>
1 parent 20edeab commit a475a38

File tree

3 files changed

+37
-19
lines changed

3 files changed

+37
-19
lines changed

src/runtime/mgc.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,7 @@ var work struct {
787787
alldone note
788788

789789
// Number of roots of various root types. Set by gcMarkRootPrepare.
790+
nFlushCacheRoots int
790791
nDataRoots, nBSSRoots, nSpanRoots, nStackRoots, nRescanRoots int
791792

792793
// markrootDone indicates that roots have been marked at least

src/runtime/mgcmark.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import (
1414

1515
const (
1616
fixedRootFinalizers = iota
17-
fixedRootFlushCaches
1817
fixedRootFreeGStacks
1918
fixedRootCount
2019

@@ -45,6 +44,12 @@ const (
4544
//
4645
//go:nowritebarrier
4746
func gcMarkRootPrepare() {
47+
if gcphase == _GCmarktermination {
48+
work.nFlushCacheRoots = int(gomaxprocs)
49+
} else {
50+
work.nFlushCacheRoots = 0
51+
}
52+
4853
// Compute how many data and BSS root blocks there are.
4954
nBlocks := func(bytes uintptr) int {
5055
return int((bytes + rootBlockBytes - 1) / rootBlockBytes)
@@ -108,7 +113,7 @@ func gcMarkRootPrepare() {
108113
}
109114

110115
work.markrootNext = 0
111-
work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots + work.nRescanRoots)
116+
work.markrootJobs = uint32(fixedRootCount + work.nFlushCacheRoots + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots + work.nRescanRoots)
112117
}
113118

114119
// gcMarkRootCheck checks that all roots have been scanned. It is
@@ -156,7 +161,8 @@ var oneptrmask = [...]uint8{1}
156161
func markroot(gcw *gcWork, i uint32) {
157162
// TODO(austin): This is a bit ridiculous. Compute and store
158163
// the bases in gcMarkRootPrepare instead of the counts.
159-
baseData := uint32(fixedRootCount)
164+
baseFlushCache := uint32(fixedRootCount)
165+
baseData := baseFlushCache + uint32(work.nFlushCacheRoots)
160166
baseBSS := baseData + uint32(work.nDataRoots)
161167
baseSpans := baseBSS + uint32(work.nBSSRoots)
162168
baseStacks := baseSpans + uint32(work.nSpanRoots)
@@ -165,6 +171,9 @@ func markroot(gcw *gcWork, i uint32) {
165171

166172
// Note: if you add a case here, please also update heapdump.go:dumproots.
167173
switch {
174+
case baseFlushCache <= i && i < baseData:
175+
flushmcache(int(i - baseFlushCache))
176+
168177
case baseData <= i && i < baseBSS:
169178
for datap := &firstmoduledata; datap != nil; datap = datap.next {
170179
markrootBlock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, gcw, int(i-baseData))
@@ -180,11 +189,6 @@ func markroot(gcw *gcWork, i uint32) {
180189
scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw)
181190
}
182191

183-
case i == fixedRootFlushCaches:
184-
if gcphase == _GCmarktermination { // Do not flush mcaches during concurrent phase.
185-
flushallmcaches()
186-
}
187-
188192
case i == fixedRootFreeGStacks:
189193
// Only do this once per GC cycle; preferably
190194
// concurrently.

src/runtime/mstats.go

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -576,19 +576,32 @@ func cachestats() {
576576
}
577577
}
578578

579+
// flushmcache flushes the mcache of allp[i].
580+
//
581+
// The world must be stopped.
582+
//
583+
//go:nowritebarrier
584+
func flushmcache(i int) {
585+
p := allp[i]
586+
if p == nil {
587+
return
588+
}
589+
c := p.mcache
590+
if c == nil {
591+
return
592+
}
593+
c.releaseAll()
594+
stackcache_clear(c)
595+
}
596+
597+
// flushallmcaches flushes the mcaches of all Ps.
598+
//
599+
// The world must be stopped.
600+
//
579601
//go:nowritebarrier
580602
func flushallmcaches() {
581-
for i := 0; ; i++ {
582-
p := allp[i]
583-
if p == nil {
584-
break
585-
}
586-
c := p.mcache
587-
if c == nil {
588-
continue
589-
}
590-
c.releaseAll()
591-
stackcache_clear(c)
603+
for i := 0; i < int(gomaxprocs); i++ {
604+
flushmcache(i)
592605
}
593606
}
594607

0 commit comments

Comments
 (0)