@@ -89,6 +89,25 @@ type mheap struct {
89
89
// TODO(austin): pagesInUse should be a uintptr, but the 386
90
90
// compiler can't 8-byte align fields.
91
91
92
+ // Page reclaimer state
93
+
94
+ // reclaimIndex is the page index in allArenas of next page to
95
+ // reclaim. Specifically, it refers to page (i %
96
+ // pagesPerArena) of arena allArenas[i / pagesPerArena].
97
+ //
98
+ // If this is >= 1<<63, the page reclaimer is done scanning
99
+ // the page marks.
100
+ //
101
+ // This is accessed atomically.
102
+ reclaimIndex uint64
103
+ // reclaimCredit is spare credit for extra pages swept. Since
104
+ // the page reclaimer works in large chunks, it may reclaim
105
+ // more than requested. Any spare pages released go to this
106
+ // credit pool.
107
+ //
108
+ // This is accessed atomically.
109
+ reclaimCredit uintptr
110
+
92
111
// Malloc stats.
93
112
largealloc uint64 // bytes allocated for large objects
94
113
nlargealloc uint64 // number of large object allocations
@@ -142,6 +161,11 @@ type mheap struct {
142
161
// then release mheap_.lock.
143
162
allArenas []arenaIdx
144
163
164
+ // sweepArenas is a snapshot of allArenas taken at the
165
+ // beginning of the sweep cycle. This can be read safely by
166
+ // simply blocking GC (by disabling preemption).
167
+ sweepArenas []arenaIdx
168
+
145
169
_ uint32 // ensure 64-bit alignment of central
146
170
147
171
// central free lists for small size classes.
@@ -658,61 +682,158 @@ func (h *mheap) init() {
658
682
}
659
683
}
660
684
661
- // Sweeps spans in list until reclaims at least npages into heap.
662
- // Returns the actual number of pages reclaimed.
663
- func (h * mheap ) reclaimList (list * mSpanList , npages uintptr ) uintptr {
664
- n := uintptr (0 )
665
- sg := mheap_ .sweepgen
666
- retry:
667
- for s := list .first ; s != nil ; s = s .next {
668
- if s .sweepgen == sg - 2 && atomic .Cas (& s .sweepgen , sg - 2 , sg - 1 ) {
669
- list .remove (s )
670
- // swept spans are at the end of the list
671
- list .insertBack (s ) // Puts it back on a busy list. s is not in the treap at this point.
672
- unlock (& h .lock )
673
- snpages := s .npages
674
- if s .sweep (false ) {
675
- n += snpages
685
+ // reclaim sweeps and reclaims at least npage pages into the heap.
686
+ // It is called before allocating npage pages to keep growth in check.
687
+ //
688
+ // reclaim implements the page-reclaimer half of the sweeper.
689
+ //
690
+ // h must NOT be locked.
691
+ func (h * mheap ) reclaim (npage uintptr ) {
692
+ // This scans pagesPerChunk at a time. Higher values reduce
693
+ // contention on h.reclaimPos, but increase the minimum
694
+ // latency of performing a reclaim.
695
+ //
696
+ // Must be a multiple of the pageInUse bitmap element size.
697
+ //
698
+ // The time required by this can vary a lot depending on how
699
+ // many spans are actually freed. Experimentally, it can scan
700
+ // for pages at ~300 GB/ms on a 2.6GHz Core i7, but can only
701
+ // free spans at ~32 MB/ms. Using 512 pages bounds this at
702
+ // roughly 100µs.
703
+ //
704
+ // TODO(austin): Half of the time spent freeing spans is in
705
+ // locking/unlocking the heap (even with low contention). We
706
+ // could make the slow path here several times faster by
707
+ // batching heap frees.
708
+ const pagesPerChunk = 512
709
+
710
+ // Bail early if there's no more reclaim work.
711
+ if atomic .Load64 (& h .reclaimIndex ) >= 1 << 63 {
712
+ return
713
+ }
714
+
715
+ // Disable preemption so the GC can't start while we're
716
+ // sweeping, so we can read h.sweepArenas, and so
717
+ // traceGCSweepStart/Done pair on the P.
718
+ mp := acquirem ()
719
+
720
+ if trace .enabled {
721
+ traceGCSweepStart ()
722
+ }
723
+
724
+ arenas := h .sweepArenas
725
+ locked := false
726
+ for npage > 0 {
727
+ // Pull from accumulated credit first.
728
+ if credit := atomic .Loaduintptr (& h .reclaimCredit ); credit > 0 {
729
+ take := credit
730
+ if take > npage {
731
+ // Take only what we need.
732
+ take = npage
676
733
}
677
- lock (& h .lock )
678
- if n >= npages {
679
- return n
734
+ if atomic .Casuintptr (& h .reclaimCredit , credit , credit - take ) {
735
+ npage -= take
680
736
}
681
- // the span could have been moved elsewhere
682
- goto retry
683
- }
684
- if s .sweepgen == sg - 1 {
685
- // the span is being swept by background sweeper, skip
686
737
continue
687
738
}
688
- // already swept empty span,
689
- // all subsequent ones must also be either swept or in process of sweeping
690
- break
739
+
740
+ // Claim a chunk of work.
741
+ idx := uintptr (atomic .Xadd64 (& h .reclaimIndex , pagesPerChunk ) - pagesPerChunk )
742
+ if idx / pagesPerArena >= uintptr (len (arenas )) {
743
+ // Page reclaiming is done.
744
+ atomic .Store64 (& h .reclaimIndex , 1 << 63 )
745
+ break
746
+ }
747
+
748
+ if ! locked {
749
+ // Lock the heap for reclaimChunk.
750
+ lock (& h .lock )
751
+ locked = true
752
+ }
753
+
754
+ // Scan this chunk.
755
+ nfound := h .reclaimChunk (arenas , idx , pagesPerChunk )
756
+ if nfound <= npage {
757
+ npage -= nfound
758
+ } else {
759
+ // Put spare pages toward global credit.
760
+ atomic .Xadduintptr (& h .reclaimCredit , nfound - npage )
761
+ npage = 0
762
+ }
763
+ }
764
+ if locked {
765
+ unlock (& h .lock )
691
766
}
692
- return n
693
- }
694
767
695
- // Sweeps and reclaims at least npage pages into heap.
696
- // Called before allocating npage pages.
697
- func (h * mheap ) reclaim (npage uintptr ) {
698
- if h .reclaimList (& h .busy , npage ) != 0 {
699
- return // Bingo!
768
+ if trace .enabled {
769
+ traceGCSweepDone ()
700
770
}
771
+ releasem (mp )
772
+ }
701
773
702
- // Now sweep everything that is not yet swept.
703
- var reclaimed uintptr
704
- unlock (& h .lock )
705
- for {
706
- n := sweepone ()
707
- if n == ^ uintptr (0 ) { // all spans are swept
708
- break
774
+ // reclaimChunk sweeps unmarked spans that start at page indexes [pageIdx, pageIdx+n).
775
+ // It returns the number of pages returned to the heap.
776
+ //
777
+ // h.lock must be held and the caller must be non-preemptible.
778
+ func (h * mheap ) reclaimChunk (arenas []arenaIdx , pageIdx , n uintptr ) uintptr {
779
+ // The heap lock must be held because this accesses the
780
+ // heapArena.spans arrays using potentially non-live pointers.
781
+ // In particular, if a span were freed and merged concurrently
782
+ // with this probing heapArena.spans, it would be possible to
783
+ // observe arbitrary, stale span pointers.
784
+ n0 := n
785
+ var nFreed uintptr
786
+ sg := h .sweepgen
787
+ for n > 0 {
788
+ ai := arenas [pageIdx / pagesPerArena ]
789
+ ha := h .arenas [ai .l1 ()][ai .l2 ()]
790
+
791
+ // Get a chunk of the bitmap to work on.
792
+ arenaPage := uint (pageIdx % pagesPerArena )
793
+ inUse := ha .pageInUse [arenaPage / 8 :]
794
+ marked := ha .pageMarks [arenaPage / 8 :]
795
+ if uintptr (len (inUse )) > n / 8 {
796
+ inUse = inUse [:n / 8 ]
797
+ marked = marked [:n / 8 ]
709
798
}
710
- reclaimed += n
711
- if reclaimed >= npage {
712
- break
799
+
800
+ // Scan this bitmap chunk for spans that are in-use
801
+ // but have no marked objects on them.
802
+ for i := range inUse {
803
+ inUseUnmarked := inUse [i ] &^ marked [i ]
804
+ if inUseUnmarked == 0 {
805
+ continue
806
+ }
807
+
808
+ for j := uint (0 ); j < 8 ; j ++ {
809
+ if inUseUnmarked & (1 << j ) != 0 {
810
+ s := ha .spans [arenaPage + uint (i )* 8 + j ]
811
+ if atomic .Load (& s .sweepgen ) == sg - 2 && atomic .Cas (& s .sweepgen , sg - 2 , sg - 1 ) {
812
+ npages := s .npages
813
+ unlock (& h .lock )
814
+ if s .sweep (false ) {
815
+ nFreed += npages
816
+ }
817
+ lock (& h .lock )
818
+ // Reload inUse. It's possible nearby
819
+ // spans were freed when we dropped the
820
+ // lock and we don't want to get stale
821
+ // pointers from the spans array.
822
+ inUseUnmarked = inUse [i ] &^ marked [i ]
823
+ }
824
+ }
825
+ }
713
826
}
827
+
828
+ // Advance.
829
+ pageIdx += uintptr (len (inUse ) * 8 )
830
+ n -= uintptr (len (inUse ) * 8 )
714
831
}
715
- lock (& h .lock )
832
+ if trace .enabled {
833
+ // Account for pages scanned but not reclaimed.
834
+ traceGCSweepSpan ((n0 - nFreed ) * pageSize )
835
+ }
836
+ return nFreed
716
837
}
717
838
718
839
// alloc_m is the internal implementation of mheap.alloc.
@@ -723,27 +844,14 @@ func (h *mheap) reclaim(npage uintptr) {
723
844
//go:systemstack
724
845
func (h * mheap ) alloc_m (npage uintptr , spanclass spanClass , large bool ) * mspan {
725
846
_g_ := getg ()
726
- lock (& h .lock )
727
847
728
848
// To prevent excessive heap growth, before allocating n pages
729
849
// we need to sweep and reclaim at least n pages.
730
850
if h .sweepdone == 0 {
731
- // TODO(austin): This tends to sweep a large number of
732
- // spans in order to find a few completely free spans
733
- // (for example, in the garbage benchmark, this sweeps
734
- // ~30x the number of pages it's trying to allocate).
735
- // If GC kept a bit for whether there were any marks
736
- // in a span, we could release these free spans
737
- // at the end of GC and eliminate this entirely.
738
- if trace .enabled {
739
- traceGCSweepStart ()
740
- }
741
851
h .reclaim (npage )
742
- if trace .enabled {
743
- traceGCSweepDone ()
744
- }
745
852
}
746
853
854
+ lock (& h .lock )
747
855
// transfer stats from cache to global
748
856
memstats .heap_scan += uint64 (_g_ .m .mcache .local_scan )
749
857
_g_ .m .mcache .local_scan = 0
0 commit comments