runtime: split spans when scavenging if it's more than we need

mknyszek · mknyszek · commit ff70494b75fa · 2019-05-09T16:23:09.000Z
This change makes it so that during scavenging we split spans when the span we have next for scavenging is larger than the amount of work we have left to do. The purpose of this change is to improve the worst-case behavior of the scavenger: currently, if the scavenger only has a little bit of work to do but sees a very large free span, it will scavenge the whole thing, spending a lot of time to get way ahead of the scavenge pacing for no reason. With this change the scavenger should follow the pacing more closely, but may still over-scavenge by up to a physical huge page since the splitting behavior avoids breaking up huge pages in free spans. This change is also the culmination of the scavenging improvements, so we also include benchmark results for this series (starting from "runtime: merge all treaps into one implementation" until this patch). This patch stack results in average and peak RSS reductions (up to 11% and 7% respectively) for some benchmarks, with mostly minimal performance degredation (3-4% for some benchmarks, ~0% geomean). Each of these benchmarks was executed with GODEBUG=madvdontneed=1 on Linux; the performance degredation is even smaller when MADV_FREE may be used, but the impact on RSS is much harder to measure. Applications that generally maintain a steady heap size for the most part show no change in application performance. These benchmarks are taken from an experimental benchmarking suite representing a variety of open-source Go packages, the raw results may be found here: https://perf.golang.org/search?q=upload:20190509.1 For #30333. Change-Id: I618a48534d2d6ce5f656bb66825e3c383ab1ffba Reviewed-on: https://go-review.googlesource.com/c/go/+/175797 Run-TryBot: Michael Knyszek <mknyszek@google.com> Reviewed-by: Austin Clements <austin@google.com>
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
@@ -1350,6 +1350,63 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool) {
 	h.free.insert(s)
 }
 
+// scavengeSplit takes t.span() and attempts to split off a span containing size
+// (in bytes) worth of physical pages from the back.
+//
+// The split point is only approximately defined by size since the split point
+// is aligned to physPageSize and pageSize every time. If physHugePageSize is
+// non-zero and the split point would break apart a huge page in the span, then
+// the split point is also aligned to physHugePageSize.
+//
+// If the desired split point ends up at the base of s, or if size is obviously
+// much larger than s, then a split is not possible and this method returns nil.
+// Otherwise if a split occurred it returns the newly-created span.
+func (h *mheap) scavengeSplit(t treapIter, size uintptr) *mspan {
+	s := t.span()
+	start, end := s.physPageBounds()
+	if end <= start || end-start <= size {
+		// Size covers the whole span.
+		return nil
+	}
+	// The span is bigger than what we need, so compute the base for the new
+	// span if we decide to split.
+	base := end - size
+	// Round down to the next physical or logical page, whichever is bigger.
+	base &^= (physPageSize - 1) | (pageSize - 1)
+	if base <= start {
+		return nil
+	}
+	if physHugePageSize > pageSize && base&^(physHugePageSize-1) >= start {
+		// We're in danger of breaking apart a huge page, so include the entire
+		// huge page in the bound by rounding down to the huge page size.
+		// base should still be aligned to pageSize.
+		base &^= physHugePageSize - 1
+	}
+	if base == start {
+		// After all that we rounded base down to s.base(), so no need to split.
+		return nil
+	}
+	if base < start {
+		print("runtime: base=", base, ", s.npages=", s.npages, ", s.base()=", s.base(), ", size=", size, "\n")
+		print("runtime: physPageSize=", physPageSize, ", physHugePageSize=", physHugePageSize, "\n")
+		throw("bad span split base")
+	}
+
+	// Split s in-place, removing from the back.
+	n := (*mspan)(h.spanalloc.alloc())
+	nbytes := s.base() + s.npages*pageSize - base
+	h.free.mutate(t, func(s *mspan) {
+		n.init(base, nbytes/pageSize)
+		s.npages -= nbytes / pageSize
+		h.setSpan(n.base()-1, s)
+		h.setSpan(n.base(), n)
+		h.setSpan(n.base()+nbytes-1, n)
+		n.needzero = s.needzero
+		n.state = s.state
+	})
+	return n
+}
+
 // scavengeLocked scavenges nbytes worth of spans in the free treap by
 // starting from the span with the highest base address and working down.
 // It then takes those spans and places them in scav.
@@ -1371,7 +1428,11 @@ func (h *mheap) scavengeLocked(nbytes uintptr) uintptr {
 				continue
 			}
 			n := t.prev()
-			h.free.erase(t)
+			if span := h.scavengeSplit(t, nbytes-released); span != nil {
+				s = span
+			} else {
+				h.free.erase(t)
+			}
 			released += s.scavenge()
 			// Now that s is scavenged, we must eagerly coalesce it
 			// with its neighbors to prevent having two spans with