Skip to content

Commit 6bd85f7

Browse files
committed
runtime: implement efficient page reclaimer
When we attempt to allocate an N page span (either for a large allocation or when an mcentral runs dry), we first try to sweep spans to release N pages. Currently, this can be extremely expensive: sweeping a span to emptiness is the hardest thing to ask for and the sweeper generally doesn't know where to even look for potentially fruitful results. Since this is on the critical path of many allocations, this is unfortunate. This CL changes how we reclaim empty spans. Instead of trying lots of spans and hoping for the best, it uses the newly introduced span marks to efficiently find empty spans. The span marks (and in-use bits) are in a dense bitmap, so these spans can be found with an efficient sequential memory scan. This approach can scan for unmarked spans at about 300 GB/ms and can free unmarked spans at about 32 MB/ms. We could probably significantly improve the rate at which is can free unmarked spans, but that's a separate issue. Like the current reclaimer, this is still linear in the number of spans that are swept, but the constant factor is now so vanishingly small that it doesn't matter. The benchmark in #18155 demonstrates both significant page reclaiming delays, and object reclaiming delays. With "-retain-count=20000000 -preallocate=true -loop-count=3", the benchmark demonstrates several page reclaiming delays on the order of 40ms. After this change, the page reclaims are insignificant. The longest sweeps are still ~150ms, but are object reclaiming delays. We'll address those in the next several CLs. Updates #18155. Fixes #21378 by completely replacing the logic that had that bug. Change-Id: Iad80eec11d7fc262d02c8f0761ac6998425c4064 Reviewed-on: https://go-review.googlesource.com/c/138959 Run-TryBot: Austin Clements <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Rick Hudson <[email protected]>
1 parent 6d19461 commit 6bd85f7

File tree

3 files changed

+194
-60
lines changed

3 files changed

+194
-60
lines changed

src/runtime/mgc.go

+3
Original file line numberDiff line numberDiff line change
@@ -1974,6 +1974,9 @@ func gcSweep(mode gcMode) {
19741974
throw("non-empty swept list")
19751975
}
19761976
mheap_.pagesSwept = 0
1977+
mheap_.sweepArenas = mheap_.allArenas
1978+
mheap_.reclaimIndex = 0
1979+
mheap_.reclaimCredit = 0
19771980
unlock(&mheap_.lock)
19781981

19791982
if !_ConcurrentSweep || mode == gcForceBlockMode {

src/runtime/mgcsweep.go

+25-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,24 @@
44

55
// Garbage collector: sweeping
66

7+
// The sweeper consists of two different algorithms:
8+
//
9+
// * The object reclaimer finds and frees unmarked slots in spans. It
10+
// can free a whole span if none of the objects are marked, but that
11+
// isn't its goal. This can be driven either synchronously by
12+
// mcentral.cacheSpan for mcentral spans, or asynchronously by
13+
// sweepone from the list of all in-use spans in mheap_.sweepSpans.
14+
//
15+
// * The span reclaimer looks for spans that contain no marked objects
16+
// and frees whole spans. This is a separate algorithm because
17+
// freeing whole spans is the hardest task for the object reclaimer,
18+
// but is critical when allocating new spans. The entry point for
19+
// this is mheap_.reclaim and it's driven by a sequential scan of
20+
// the page marks bitmap in the heap arenas.
21+
//
22+
// Both algorithms ultimately call mspan.sweep, which sweeps a single
23+
// heap span.
24+
725
package runtime
826

927
import (
@@ -72,7 +90,7 @@ func bgsweep(c chan int) {
7290
}
7391
}
7492

75-
// sweepone sweeps one span and returns the number of pages returned
93+
// sweepone sweeps some unswept heap span and returns the number of pages returned
7694
// to the heap, or ^uintptr(0) if there was nothing to sweep.
7795
func sweepone() uintptr {
7896
_g_ := getg()
@@ -115,7 +133,12 @@ func sweepone() uintptr {
115133
npages := ^uintptr(0)
116134
if s != nil {
117135
npages = s.npages
118-
if !s.sweep(false) {
136+
if s.sweep(false) {
137+
// Whole span was freed. Count it toward the
138+
// page reclaimer credit since these pages can
139+
// now be used for span allocation.
140+
atomic.Xadduintptr(&mheap_.reclaimCredit, npages)
141+
} else {
119142
// Span is still in-use, so this returned no
120143
// pages to the heap and the span needs to
121144
// move to the swept in-use list.

src/runtime/mheap.go

+166-58
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,25 @@ type mheap struct {
8989
// TODO(austin): pagesInUse should be a uintptr, but the 386
9090
// compiler can't 8-byte align fields.
9191

92+
// Page reclaimer state
93+
94+
// reclaimIndex is the page index in allArenas of next page to
95+
// reclaim. Specifically, it refers to page (i %
96+
// pagesPerArena) of arena allArenas[i / pagesPerArena].
97+
//
98+
// If this is >= 1<<63, the page reclaimer is done scanning
99+
// the page marks.
100+
//
101+
// This is accessed atomically.
102+
reclaimIndex uint64
103+
// reclaimCredit is spare credit for extra pages swept. Since
104+
// the page reclaimer works in large chunks, it may reclaim
105+
// more than requested. Any spare pages released go to this
106+
// credit pool.
107+
//
108+
// This is accessed atomically.
109+
reclaimCredit uintptr
110+
92111
// Malloc stats.
93112
largealloc uint64 // bytes allocated for large objects
94113
nlargealloc uint64 // number of large object allocations
@@ -142,6 +161,11 @@ type mheap struct {
142161
// then release mheap_.lock.
143162
allArenas []arenaIdx
144163

164+
// sweepArenas is a snapshot of allArenas taken at the
165+
// beginning of the sweep cycle. This can be read safely by
166+
// simply blocking GC (by disabling preemption).
167+
sweepArenas []arenaIdx
168+
145169
_ uint32 // ensure 64-bit alignment of central
146170

147171
// central free lists for small size classes.
@@ -658,61 +682,158 @@ func (h *mheap) init() {
658682
}
659683
}
660684

661-
// Sweeps spans in list until reclaims at least npages into heap.
662-
// Returns the actual number of pages reclaimed.
663-
func (h *mheap) reclaimList(list *mSpanList, npages uintptr) uintptr {
664-
n := uintptr(0)
665-
sg := mheap_.sweepgen
666-
retry:
667-
for s := list.first; s != nil; s = s.next {
668-
if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
669-
list.remove(s)
670-
// swept spans are at the end of the list
671-
list.insertBack(s) // Puts it back on a busy list. s is not in the treap at this point.
672-
unlock(&h.lock)
673-
snpages := s.npages
674-
if s.sweep(false) {
675-
n += snpages
685+
// reclaim sweeps and reclaims at least npage pages into the heap.
686+
// It is called before allocating npage pages to keep growth in check.
687+
//
688+
// reclaim implements the page-reclaimer half of the sweeper.
689+
//
690+
// h must NOT be locked.
691+
func (h *mheap) reclaim(npage uintptr) {
692+
// This scans pagesPerChunk at a time. Higher values reduce
693+
// contention on h.reclaimPos, but increase the minimum
694+
// latency of performing a reclaim.
695+
//
696+
// Must be a multiple of the pageInUse bitmap element size.
697+
//
698+
// The time required by this can vary a lot depending on how
699+
// many spans are actually freed. Experimentally, it can scan
700+
// for pages at ~300 GB/ms on a 2.6GHz Core i7, but can only
701+
// free spans at ~32 MB/ms. Using 512 pages bounds this at
702+
// roughly 100µs.
703+
//
704+
// TODO(austin): Half of the time spent freeing spans is in
705+
// locking/unlocking the heap (even with low contention). We
706+
// could make the slow path here several times faster by
707+
// batching heap frees.
708+
const pagesPerChunk = 512
709+
710+
// Bail early if there's no more reclaim work.
711+
if atomic.Load64(&h.reclaimIndex) >= 1<<63 {
712+
return
713+
}
714+
715+
// Disable preemption so the GC can't start while we're
716+
// sweeping, so we can read h.sweepArenas, and so
717+
// traceGCSweepStart/Done pair on the P.
718+
mp := acquirem()
719+
720+
if trace.enabled {
721+
traceGCSweepStart()
722+
}
723+
724+
arenas := h.sweepArenas
725+
locked := false
726+
for npage > 0 {
727+
// Pull from accumulated credit first.
728+
if credit := atomic.Loaduintptr(&h.reclaimCredit); credit > 0 {
729+
take := credit
730+
if take > npage {
731+
// Take only what we need.
732+
take = npage
676733
}
677-
lock(&h.lock)
678-
if n >= npages {
679-
return n
734+
if atomic.Casuintptr(&h.reclaimCredit, credit, credit-take) {
735+
npage -= take
680736
}
681-
// the span could have been moved elsewhere
682-
goto retry
683-
}
684-
if s.sweepgen == sg-1 {
685-
// the span is being swept by background sweeper, skip
686737
continue
687738
}
688-
// already swept empty span,
689-
// all subsequent ones must also be either swept or in process of sweeping
690-
break
739+
740+
// Claim a chunk of work.
741+
idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerChunk) - pagesPerChunk)
742+
if idx/pagesPerArena >= uintptr(len(arenas)) {
743+
// Page reclaiming is done.
744+
atomic.Store64(&h.reclaimIndex, 1<<63)
745+
break
746+
}
747+
748+
if !locked {
749+
// Lock the heap for reclaimChunk.
750+
lock(&h.lock)
751+
locked = true
752+
}
753+
754+
// Scan this chunk.
755+
nfound := h.reclaimChunk(arenas, idx, pagesPerChunk)
756+
if nfound <= npage {
757+
npage -= nfound
758+
} else {
759+
// Put spare pages toward global credit.
760+
atomic.Xadduintptr(&h.reclaimCredit, nfound-npage)
761+
npage = 0
762+
}
763+
}
764+
if locked {
765+
unlock(&h.lock)
691766
}
692-
return n
693-
}
694767

695-
// Sweeps and reclaims at least npage pages into heap.
696-
// Called before allocating npage pages.
697-
func (h *mheap) reclaim(npage uintptr) {
698-
if h.reclaimList(&h.busy, npage) != 0 {
699-
return // Bingo!
768+
if trace.enabled {
769+
traceGCSweepDone()
700770
}
771+
releasem(mp)
772+
}
701773

702-
// Now sweep everything that is not yet swept.
703-
var reclaimed uintptr
704-
unlock(&h.lock)
705-
for {
706-
n := sweepone()
707-
if n == ^uintptr(0) { // all spans are swept
708-
break
774+
// reclaimChunk sweeps unmarked spans that start at page indexes [pageIdx, pageIdx+n).
775+
// It returns the number of pages returned to the heap.
776+
//
777+
// h.lock must be held and the caller must be non-preemptible.
778+
func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
779+
// The heap lock must be held because this accesses the
780+
// heapArena.spans arrays using potentially non-live pointers.
781+
// In particular, if a span were freed and merged concurrently
782+
// with this probing heapArena.spans, it would be possible to
783+
// observe arbitrary, stale span pointers.
784+
n0 := n
785+
var nFreed uintptr
786+
sg := h.sweepgen
787+
for n > 0 {
788+
ai := arenas[pageIdx/pagesPerArena]
789+
ha := h.arenas[ai.l1()][ai.l2()]
790+
791+
// Get a chunk of the bitmap to work on.
792+
arenaPage := uint(pageIdx % pagesPerArena)
793+
inUse := ha.pageInUse[arenaPage/8:]
794+
marked := ha.pageMarks[arenaPage/8:]
795+
if uintptr(len(inUse)) > n/8 {
796+
inUse = inUse[:n/8]
797+
marked = marked[:n/8]
709798
}
710-
reclaimed += n
711-
if reclaimed >= npage {
712-
break
799+
800+
// Scan this bitmap chunk for spans that are in-use
801+
// but have no marked objects on them.
802+
for i := range inUse {
803+
inUseUnmarked := inUse[i] &^ marked[i]
804+
if inUseUnmarked == 0 {
805+
continue
806+
}
807+
808+
for j := uint(0); j < 8; j++ {
809+
if inUseUnmarked&(1<<j) != 0 {
810+
s := ha.spans[arenaPage+uint(i)*8+j]
811+
if atomic.Load(&s.sweepgen) == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
812+
npages := s.npages
813+
unlock(&h.lock)
814+
if s.sweep(false) {
815+
nFreed += npages
816+
}
817+
lock(&h.lock)
818+
// Reload inUse. It's possible nearby
819+
// spans were freed when we dropped the
820+
// lock and we don't want to get stale
821+
// pointers from the spans array.
822+
inUseUnmarked = inUse[i] &^ marked[i]
823+
}
824+
}
825+
}
713826
}
827+
828+
// Advance.
829+
pageIdx += uintptr(len(inUse) * 8)
830+
n -= uintptr(len(inUse) * 8)
714831
}
715-
lock(&h.lock)
832+
if trace.enabled {
833+
// Account for pages scanned but not reclaimed.
834+
traceGCSweepSpan((n0 - nFreed) * pageSize)
835+
}
836+
return nFreed
716837
}
717838

718839
// alloc_m is the internal implementation of mheap.alloc.
@@ -723,27 +844,14 @@ func (h *mheap) reclaim(npage uintptr) {
723844
//go:systemstack
724845
func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan {
725846
_g_ := getg()
726-
lock(&h.lock)
727847

728848
// To prevent excessive heap growth, before allocating n pages
729849
// we need to sweep and reclaim at least n pages.
730850
if h.sweepdone == 0 {
731-
// TODO(austin): This tends to sweep a large number of
732-
// spans in order to find a few completely free spans
733-
// (for example, in the garbage benchmark, this sweeps
734-
// ~30x the number of pages it's trying to allocate).
735-
// If GC kept a bit for whether there were any marks
736-
// in a span, we could release these free spans
737-
// at the end of GC and eliminate this entirely.
738-
if trace.enabled {
739-
traceGCSweepStart()
740-
}
741851
h.reclaim(npage)
742-
if trace.enabled {
743-
traceGCSweepDone()
744-
}
745852
}
746853

854+
lock(&h.lock)
747855
// transfer stats from cache to global
748856
memstats.heap_scan += uint64(_g_.m.mcache.local_scan)
749857
_g_.m.mcache.local_scan = 0

0 commit comments

Comments
 (0)