@@ -65,20 +65,7 @@ const (
65
65
//
66
66
// scavengePercent represents the portion of mutator time we're willing
67
67
// to spend on scavenging in percent.
68
- //
69
- // scavengePageLatency is a worst-case estimate (order-of-magnitude) of
70
- // the time it takes to scavenge one (regular-sized) page of memory.
71
- // scavengeHugePageLatency is the same but for huge pages.
72
- //
73
- // scavengePagePeriod is derived from scavengePercent and scavengePageLatency,
74
- // and represents the average time between scavenging one page that we're
75
- // aiming for. scavengeHugePagePeriod is the same but for huge pages.
76
- // These constants are core to the scavenge pacing algorithm.
77
- scavengePercent = 1 // 1%
78
- scavengePageLatency = 10e3 // 10µs
79
- scavengeHugePageLatency = 10e3 // 10µs
80
- scavengePagePeriod = scavengePageLatency / (scavengePercent / 100.0 )
81
- scavengeHugePagePeriod = scavengePageLatency / (scavengePercent / 100.0 )
68
+ scavengePercent = 1 // 1%
82
69
83
70
// retainExtraPercent represents the amount of memory over the heap goal
84
71
// that the scavenger should keep as a buffer space for the allocator.
@@ -113,7 +100,7 @@ func gcPaceScavenger() {
113
100
// information about the heap yet) so this is fine, and avoids a fault
114
101
// or garbage data later.
115
102
if memstats .last_next_gc == 0 {
116
- mheap_ .scavengeBytesPerNS = 0
103
+ mheap_ .scavengeGoal = ^ uint64 ( 0 )
117
104
return
118
105
}
119
106
// Compute our scavenging goal.
@@ -141,67 +128,14 @@ func gcPaceScavenger() {
141
128
// physical page.
142
129
retainedNow := heapRetained ()
143
130
144
- // If we're already below our goal or there's less the one physical page
145
- // worth of work to do, publish the goal in case it changed then disable
131
+ // If we're already below our goal, or within one page of our goal, then disable
146
132
// the background scavenger. We disable the background scavenger if there's
147
- // less than one physical page of work to do to avoid a potential divide-by-zero
148
- // in the calculations below (totalTime will be zero), and it's not worth
149
- // turning on the scavenger for less than one page of work.
133
+ // less than one physical page of work to do because it's not worth it.
150
134
if retainedNow <= retainedGoal || retainedNow - retainedGoal < uint64 (physPageSize ) {
151
- mheap_ .scavengeRetainedGoal = retainedGoal
152
- mheap_ .scavengeBytesPerNS = 0
135
+ mheap_ .scavengeGoal = ^ uint64 (0 )
153
136
return
154
137
}
155
-
156
- // Now we start to compute the total amount of work necessary and the total
157
- // amount of time we're willing to give the scavenger to complete this work.
158
- // This will involve calculating how much of the work consists of huge pages
159
- // and how much consists of regular pages since the former can let us scavenge
160
- // more memory in the same time.
161
- totalWork := retainedNow - retainedGoal
162
-
163
- // On systems without huge page support, all work is regular work.
164
- regularWork := totalWork
165
- hugeTime := uint64 (0 )
166
-
167
- // On systems where we have huge pages, we want to do as much of the
168
- // scavenging work as possible on huge pages, because the costs are the
169
- // same per page, but we can give back more more memory in a shorter
170
- // period of time.
171
- if physHugePageSize != 0 {
172
- // Start by computing the amount of free memory we have in huge pages
173
- // in total. Trivially, this is all the huge page work we need to do.
174
- hugeWork := uint64 (mheap_ .free .unscavHugePages ) << physHugePageShift
175
-
176
- // ...but it could turn out that there's more huge work to do than
177
- // total work, so cap it at total work. This might happen for very large
178
- // heaps where the additional factor of retainExtraPercent can make it so
179
- // that there are free chunks of memory larger than a huge page that we don't want
180
- // to scavenge.
181
- if hugeWork >= totalWork {
182
- hugePages := totalWork >> physHugePageShift
183
- hugeWork = hugePages << physHugePageShift
184
- }
185
- // Everything that's not huge work is regular work. At this point we
186
- // know huge work so we can calculate how much time that will take
187
- // based on scavengePageRate (which applies to pages of any size).
188
- regularWork = totalWork - hugeWork
189
- hugeTime = (hugeWork >> physHugePageShift ) * scavengeHugePagePeriod
190
- }
191
- // Finally, we can compute how much time it'll take to do the regular work
192
- // and the total time to do all the work.
193
- regularTime := regularWork / uint64 (physPageSize ) * scavengePagePeriod
194
- totalTime := hugeTime + regularTime
195
-
196
- now := nanotime ()
197
-
198
- // Update all the pacing parameters in mheap with scavenge.lock held,
199
- // so that scavenge.gen is kept in sync with the updated values.
200
- mheap_ .scavengeRetainedGoal = retainedGoal
201
- mheap_ .scavengeRetainedBasis = retainedNow
202
- mheap_ .scavengeTimeBasis = now
203
- mheap_ .scavengeBytesPerNS = float64 (totalWork ) / float64 (totalTime )
204
- mheap_ .scavengeGen ++ // increase scavenge generation
138
+ mheap_ .scavengeGoal = retainedGoal
205
139
}
206
140
207
141
// Sleep/wait state of the background scavenger.
@@ -210,18 +144,6 @@ var scavenge struct {
210
144
g * g
211
145
parked bool
212
146
timer * timer
213
-
214
- // Generation counter.
215
- //
216
- // It represents the last generation count (as defined by
217
- // mheap_.scavengeGen) checked by the scavenger and is updated
218
- // each time the scavenger checks whether it is on-pace.
219
- //
220
- // Skew between this field and mheap_.scavengeGen is used to
221
- // determine whether a new update is available.
222
- //
223
- // Protected by mheap_.lock.
224
- gen uint64
225
147
}
226
148
227
149
// wakeScavenger unparks the scavenger if necessary. It must be called
@@ -254,37 +176,24 @@ func wakeScavenger() {
254
176
// The scavenger may be woken up earlier by a pacing change, and it may not go
255
177
// to sleep at all if there's a pending pacing change.
256
178
//
257
- // Returns false if awoken early (i.e. true means a complete sleep) .
258
- func scavengeSleep (ns int64 ) bool {
179
+ // Returns the amount of time actually slept .
180
+ func scavengeSleep (ns int64 ) int64 {
259
181
lock (& scavenge .lock )
260
182
261
- // First check if there's a pending update.
262
- // If there is one, don't bother sleeping.
263
- var hasUpdate bool
264
- systemstack (func () {
265
- lock (& mheap_ .lock )
266
- hasUpdate = mheap_ .scavengeGen != scavenge .gen
267
- unlock (& mheap_ .lock )
268
- })
269
- if hasUpdate {
270
- unlock (& scavenge .lock )
271
- return false
272
- }
273
-
274
183
// Set the timer.
275
184
//
276
185
// This must happen here instead of inside gopark
277
186
// because we can't close over any variables without
278
187
// failing escape analysis.
279
- now := nanotime ()
280
- resetTimer (scavenge .timer , now + ns )
188
+ start := nanotime ()
189
+ resetTimer (scavenge .timer , start + ns )
281
190
282
191
// Mark ourself as asleep and go to sleep.
283
192
scavenge .parked = true
284
193
goparkunlock (& scavenge .lock , waitReasonSleep , traceEvGoSleep , 2 )
285
194
286
- // Return true if we completed the full sleep .
287
- return ( nanotime () - now ) >= ns
195
+ // Return how long we actually slept for .
196
+ return nanotime () - start
288
197
}
289
198
290
199
// Background scavenger.
@@ -306,111 +215,98 @@ func bgscavenge(c chan int) {
306
215
c <- 1
307
216
goparkunlock (& scavenge .lock , waitReasonGCScavengeWait , traceEvGoBlock , 1 )
308
217
309
- // Parameters for sleeping.
310
- //
311
- // If we end up doing more work than we need, we should avoid spinning
312
- // until we have more work to do: instead, we know exactly how much time
313
- // until more work will need to be done, so we sleep.
314
- //
315
- // We should avoid sleeping for less than minSleepNS because Gosched()
316
- // overheads among other things will work out better in that case.
218
+ // Exponentially-weighted moving average of the fraction of time this
219
+ // goroutine spends scavenging (that is, percent of a single CPU).
220
+ // It represents a measure of scheduling overheads which might extend
221
+ // the sleep or the critical time beyond what's expected. Assume no
222
+ // overhead to begin with.
317
223
//
318
- // There's no reason to set a maximum on sleep time because we'll always
319
- // get woken up earlier if there's any kind of update that could change
320
- // the scavenger's pacing.
321
- //
322
- // retryDelayNS tracks how much to sleep next time we fail to do any
323
- // useful work.
324
- const minSleepNS = int64 (100 * 1000 ) // 100 µs
325
-
326
- retryDelayNS := minSleepNS
224
+ // TODO(mknyszek): Consider making this based on total CPU time of the
225
+ // application (i.e. scavengePercent * GOMAXPROCS). This isn't really
226
+ // feasible now because the scavenger acquires the heap lock over the
227
+ // scavenging operation, which means scavenging effectively blocks
228
+ // allocators and isn't scalable. However, given a scalable allocator,
229
+ // it makes sense to also make the scavenger scale with it; if you're
230
+ // allocating more frequently, then presumably you're also generating
231
+ // more work for the scavenger.
232
+ const idealFraction = scavengePercent / 100.0
233
+ scavengeEWMA := float64 (idealFraction )
327
234
328
235
for {
329
236
released := uintptr (0 )
330
- park := false
331
- ttnext := int64 (0 )
237
+
238
+ // Time in scavenging critical section.
239
+ crit := int64 (0 )
332
240
333
241
// Run on the system stack since we grab the heap lock,
334
242
// and a stack growth with the heap lock means a deadlock.
335
243
systemstack (func () {
336
244
lock (& mheap_ .lock )
337
245
338
- // Update the last generation count that the scavenger has handled.
339
- scavenge .gen = mheap_ .scavengeGen
340
-
341
246
// If background scavenging is disabled or if there's no work to do just park.
342
- retained := heapRetained ()
343
- if mheap_ . scavengeBytesPerNS == 0 || retained <= mheap_ . scavengeRetainedGoal {
247
+ retained , goal := heapRetained (), mheap_ . scavengeGoal
248
+ if retained <= goal {
344
249
unlock (& mheap_ .lock )
345
- park = true
346
250
return
347
251
}
348
252
349
- // Calculate how big we want the retained heap to be
350
- // at this point in time.
351
- //
352
- // The formula is for that of a line, y = b - mx
353
- // We want y (want),
354
- // m = scavengeBytesPerNS (> 0)
355
- // x = time between scavengeTimeBasis and now
356
- // b = scavengeRetainedBasis
357
- rate := mheap_ .scavengeBytesPerNS
358
- tdist := nanotime () - mheap_ .scavengeTimeBasis
359
- rdist := uint64 (rate * float64 (tdist ))
360
- want := mheap_ .scavengeRetainedBasis - rdist
361
-
362
- // If we're above the line, scavenge to get below the
363
- // line.
364
- if retained > want {
365
- released = mheap_ .scavengeLocked (uintptr (retained - want ))
366
- }
367
- unlock (& mheap_ .lock )
253
+ // Scavenge one page, and measure the amount of time spent scavenging.
254
+ start := nanotime ()
255
+ released = mheap_ .scavengeLocked (physPageSize )
256
+ crit = nanotime () - start
368
257
369
- // If we over-scavenged a bit, calculate how much time it'll
370
- // take at the current rate for us to make that up. We definitely
371
- // won't have any work to do until at least that amount of time
372
- // passes.
373
- if released > uintptr (retained - want ) {
374
- extra := released - uintptr (retained - want )
375
- ttnext = int64 (float64 (extra ) / rate )
376
- }
258
+ unlock (& mheap_ .lock )
377
259
})
378
260
379
- if park {
380
- lock (& scavenge .lock )
381
- scavenge .parked = true
382
- goparkunlock (& scavenge .lock , waitReasonGCScavengeWait , traceEvGoBlock , 1 )
383
- continue
384
- }
385
-
386
261
if debug .gctrace > 0 {
387
262
if released > 0 {
388
- print ("scvg: " , released >> 20 , " MB released\n " )
263
+ print ("scvg: " , released >> 10 , " KB released\n " )
389
264
}
390
265
print ("scvg: inuse: " , memstats .heap_inuse >> 20 , ", idle: " , memstats .heap_idle >> 20 , ", sys: " , memstats .heap_sys >> 20 , ", released: " , memstats .heap_released >> 20 , ", consumed: " , (memstats .heap_sys - memstats .heap_released )>> 20 , " (MB)\n " )
391
266
}
392
267
393
268
if released == 0 {
394
- // If we were unable to release anything this may be because there's
395
- // no free memory available to scavenge. Go to sleep and try again.
396
- if scavengeSleep (retryDelayNS ) {
397
- // If we successfully slept through the delay, back off exponentially.
398
- retryDelayNS *= 2
399
- }
269
+ lock (& scavenge .lock )
270
+ scavenge .parked = true
271
+ goparkunlock (& scavenge .lock , waitReasonGCScavengeWait , traceEvGoBlock , 1 )
400
272
continue
401
273
}
402
- retryDelayNS = minSleepNS
403
274
404
- if ttnext > 0 && ttnext > minSleepNS {
405
- // If there's an appreciable amount of time until the next scavenging
406
- // goal, just sleep. We'll get woken up if anything changes and this
407
- // way we avoid spinning.
408
- scavengeSleep (ttnext )
409
- continue
275
+ // If we spent more than 10 ms (for example, if the OS scheduled us away, or someone
276
+ // put their machine to sleep) in the critical section, bound the time we use to
277
+ // calculate at 10 ms to avoid letting the sleep time get arbitrarily high.
278
+ const maxCrit = 10e6
279
+ if crit > maxCrit {
280
+ crit = maxCrit
281
+ }
282
+
283
+ // Compute the amount of time to sleep, assuming we want to use at most
284
+ // scavengePercent of CPU time. Take into account scheduling overheads
285
+ // that may extend the length of our sleep by multiplying by how far
286
+ // off we are from the ideal ratio. For example, if we're sleeping too
287
+ // much, then scavengeEMWA < idealFraction, so we'll adjust the sleep time
288
+ // down.
289
+ adjust := scavengeEWMA / idealFraction
290
+ sleepTime := int64 (adjust * float64 (crit ) / (scavengePercent / 100.0 ))
291
+
292
+ // Go to sleep.
293
+ slept := scavengeSleep (sleepTime )
294
+
295
+ // Compute the new ratio.
296
+ fraction := float64 (crit ) / float64 (crit + slept )
297
+
298
+ // Set a lower bound on the fraction.
299
+ // Due to OS-related anomalies we may "sleep" for an inordinate amount
300
+ // of time. Let's avoid letting the ratio get out of hand by bounding
301
+ // the sleep time we use in our EWMA.
302
+ const minFraction = 1 / 1000
303
+ if fraction < minFraction {
304
+ fraction = minFraction
410
305
}
411
306
412
- // Give something else a chance to run, no locks are held.
413
- Gosched ()
307
+ // Update scavengeEWMA by merging in the new crit/slept ratio.
308
+ const alpha = 0.5
309
+ scavengeEWMA = alpha * fraction + (1 - alpha )* scavengeEWMA
414
310
}
415
311
}
416
312
0 commit comments