Skip to content

Commit 4c30818

Browse files
aclementsunclejack
authored andcommitted
runtime: optimize defer code
This optimizes deferproc and deferreturn in various ways. The most important optimization is that it more carefully arranges to prevent preemption or stack growth. Currently we do this by switching to the system stack on every deferproc and every deferreturn. While we need to be on the system stack for the slow path of allocating and freeing defers, in the common case we can fit in the nosplit stack. Hence, this change pushes the system stack switch down into the slow paths and makes everything now exposed to the user stack nosplit. This also eliminates the need for various acquirem/releasem pairs, since we are now preventing preemption by preventing stack split checks. As another smaller optimization, we special case the common cases of zero-sized and pointer-sized defer frames to respectively skip the copy and perform the copy in line instead of calling memmove. This speeds up the runtime defer benchmark by 42%: name old time/op new time/op delta Defer-4 75.1ns ± 1% 43.3ns ± 1% -42.31% (p=0.000 n=8+10) In reality, this speeds up defer by about 2.2X. The two benchmarks below compare a Lock/defer Unlock pair (DeferLock) with a Lock/Unlock pair (NoDeferLock). NoDeferLock establishes a baseline cost, so these two benchmarks together show that this change reduces the overhead of defer from 61.4ns to 27.9ns. name old time/op new time/op delta DeferLock-4 77.4ns ± 1% 43.9ns ± 1% -43.31% (p=0.000 n=10+10) NoDeferLock-4 16.0ns ± 0% 15.9ns ± 0% -0.39% (p=0.000 n=9+8) This also shaves 34ns off cgo calls: name old time/op new time/op delta CgoNoop-4 122ns ± 1% 88.3ns ± 1% -27.72% (p=0.000 n=8+9) Updates golang#14939, golang#16051. Change-Id: I2baa0dea378b7e4efebbee8fca919a97d5e15f38 Reviewed-on: https://go-review.googlesource.com/29656 Reviewed-by: Keith Randall <[email protected]>
1 parent 617bccf commit 4c30818

File tree

1 file changed

+80
-54
lines changed

1 file changed

+80
-54
lines changed

src/runtime/panic.go

Lines changed: 80 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package runtime
66

77
import (
88
"runtime/internal/atomic"
9+
"runtime/internal/sys"
910
"unsafe"
1011
)
1112

@@ -88,16 +89,21 @@ func deferproc(siz int32, fn *funcval) { // arguments of fn follow fn
8889
argp := uintptr(unsafe.Pointer(&fn)) + unsafe.Sizeof(fn)
8990
callerpc := getcallerpc(unsafe.Pointer(&siz))
9091

91-
systemstack(func() {
92-
d := newdefer(siz)
93-
if d._panic != nil {
94-
throw("deferproc: d.panic != nil after newdefer")
95-
}
96-
d.fn = fn
97-
d.pc = callerpc
98-
d.sp = sp
99-
memmove(add(unsafe.Pointer(d), unsafe.Sizeof(*d)), unsafe.Pointer(argp), uintptr(siz))
100-
})
92+
d := newdefer(siz)
93+
if d._panic != nil {
94+
throw("deferproc: d.panic != nil after newdefer")
95+
}
96+
d.fn = fn
97+
d.pc = callerpc
98+
d.sp = sp
99+
switch siz {
100+
case 0:
101+
// Do nothing.
102+
case sys.PtrSize:
103+
*(*uintptr)(deferArgs(d)) = *(*uintptr)(unsafe.Pointer(argp))
104+
default:
105+
memmove(deferArgs(d), unsafe.Pointer(argp), uintptr(siz))
106+
}
101107

102108
// deferproc returns 0 normally.
103109
// a deferred func that stops a panic
@@ -179,22 +185,30 @@ func init() {
179185

180186
// Allocate a Defer, usually using per-P pool.
181187
// Each defer must be released with freedefer.
182-
// Note: runs on g0 stack
188+
//
189+
// This must not grow the stack because there may be a frame without
190+
// stack map information when this is called.
191+
//
192+
//go:nosplit
183193
func newdefer(siz int32) *_defer {
184194
var d *_defer
185195
sc := deferclass(uintptr(siz))
186-
mp := acquirem()
196+
gp := getg()
187197
if sc < uintptr(len(p{}.deferpool)) {
188-
pp := mp.p.ptr()
198+
pp := gp.m.p.ptr()
189199
if len(pp.deferpool[sc]) == 0 && sched.deferpool[sc] != nil {
190-
lock(&sched.deferlock)
191-
for len(pp.deferpool[sc]) < cap(pp.deferpool[sc])/2 && sched.deferpool[sc] != nil {
192-
d := sched.deferpool[sc]
193-
sched.deferpool[sc] = d.link
194-
d.link = nil
195-
pp.deferpool[sc] = append(pp.deferpool[sc], d)
196-
}
197-
unlock(&sched.deferlock)
200+
// Take the slow path on the system stack so
201+
// we don't grow newdefer's stack.
202+
systemstack(func() {
203+
lock(&sched.deferlock)
204+
for len(pp.deferpool[sc]) < cap(pp.deferpool[sc])/2 && sched.deferpool[sc] != nil {
205+
d := sched.deferpool[sc]
206+
sched.deferpool[sc] = d.link
207+
d.link = nil
208+
pp.deferpool[sc] = append(pp.deferpool[sc], d)
209+
}
210+
unlock(&sched.deferlock)
211+
})
198212
}
199213
if n := len(pp.deferpool[sc]); n > 0 {
200214
d = pp.deferpool[sc][n-1]
@@ -204,19 +218,24 @@ func newdefer(siz int32) *_defer {
204218
}
205219
if d == nil {
206220
// Allocate new defer+args.
207-
total := roundupsize(totaldefersize(uintptr(siz)))
208-
d = (*_defer)(mallocgc(total, deferType, true))
221+
systemstack(func() {
222+
total := roundupsize(totaldefersize(uintptr(siz)))
223+
d = (*_defer)(mallocgc(total, deferType, true))
224+
})
209225
}
210226
d.siz = siz
211-
gp := mp.curg
212227
d.link = gp._defer
213228
gp._defer = d
214-
releasem(mp)
215229
return d
216230
}
217231

218232
// Free the given defer.
219233
// The defer cannot be used after this call.
234+
//
235+
// This must not grow the stack because there may be a frame without a
236+
// stack map when this is called.
237+
//
238+
//go:nosplit
220239
func freedefer(d *_defer) {
221240
if d._panic != nil {
222241
freedeferpanic()
@@ -226,31 +245,34 @@ func freedefer(d *_defer) {
226245
}
227246
sc := deferclass(uintptr(d.siz))
228247
if sc < uintptr(len(p{}.deferpool)) {
229-
mp := acquirem()
230-
pp := mp.p.ptr()
248+
pp := getg().m.p.ptr()
231249
if len(pp.deferpool[sc]) == cap(pp.deferpool[sc]) {
232250
// Transfer half of local cache to the central cache.
233-
var first, last *_defer
234-
for len(pp.deferpool[sc]) > cap(pp.deferpool[sc])/2 {
235-
n := len(pp.deferpool[sc])
236-
d := pp.deferpool[sc][n-1]
237-
pp.deferpool[sc][n-1] = nil
238-
pp.deferpool[sc] = pp.deferpool[sc][:n-1]
239-
if first == nil {
240-
first = d
241-
} else {
242-
last.link = d
251+
//
252+
// Take this slow path on the system stack so
253+
// we don't grow freedefer's stack.
254+
systemstack(func() {
255+
var first, last *_defer
256+
for len(pp.deferpool[sc]) > cap(pp.deferpool[sc])/2 {
257+
n := len(pp.deferpool[sc])
258+
d := pp.deferpool[sc][n-1]
259+
pp.deferpool[sc][n-1] = nil
260+
pp.deferpool[sc] = pp.deferpool[sc][:n-1]
261+
if first == nil {
262+
first = d
263+
} else {
264+
last.link = d
265+
}
266+
last = d
243267
}
244-
last = d
245-
}
246-
lock(&sched.deferlock)
247-
last.link = sched.deferpool[sc]
248-
sched.deferpool[sc] = first
249-
unlock(&sched.deferlock)
268+
lock(&sched.deferlock)
269+
last.link = sched.deferpool[sc]
270+
sched.deferpool[sc] = first
271+
unlock(&sched.deferlock)
272+
})
250273
}
251274
*d = _defer{}
252275
pp.deferpool[sc] = append(pp.deferpool[sc], d)
253-
releasem(mp)
254276
}
255277
}
256278

@@ -292,19 +314,23 @@ func deferreturn(arg0 uintptr) {
292314
}
293315

294316
// Moving arguments around.
295-
// Do not allow preemption here, because the garbage collector
296-
// won't know the form of the arguments until the jmpdefer can
297-
// flip the PC over to fn.
298-
mp := acquirem()
299-
memmove(unsafe.Pointer(&arg0), deferArgs(d), uintptr(d.siz))
317+
//
318+
// Everything called after this point must be recursively
319+
// nosplit because the garbage collector won't know the form
320+
// of the arguments until the jmpdefer can flip the PC over to
321+
// fn.
322+
switch d.siz {
323+
case 0:
324+
// Do nothing.
325+
case sys.PtrSize:
326+
*(*uintptr)(unsafe.Pointer(&arg0)) = *(*uintptr)(deferArgs(d))
327+
default:
328+
memmove(unsafe.Pointer(&arg0), deferArgs(d), uintptr(d.siz))
329+
}
300330
fn := d.fn
301331
d.fn = nil
302332
gp._defer = d.link
303-
// Switch to systemstack merely to save nosplit stack space.
304-
systemstack(func() {
305-
freedefer(d)
306-
})
307-
releasem(mp)
333+
freedefer(d)
308334
jmpdefer(fn, uintptr(unsafe.Pointer(&arg0)))
309335
}
310336

0 commit comments

Comments
 (0)