Skip to content

Commit c46ffdd

Browse files
committed
runtime: guard VZEROUPPER on CPU feature
In CL 219131 we inserted a VZEROUPPER instruction on darwin/amd64. The instruction is not available on pre-AVX machines. Guard it with CPU feature. Fixes #37459. Change-Id: I9a064df277d091be4ee594eda5c7fd8ee323102b Reviewed-on: https://go-review.googlesource.com/c/go/+/221057 Run-TryBot: Cherry Zhang <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent 089e482 commit c46ffdd

File tree

3 files changed

+17
-11
lines changed

3 files changed

+17
-11
lines changed

src/runtime/cpuflags.go

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
// Offsets into internal/cpu records for use in assembly.
1313
const (
14+
offsetX86HasAVX = unsafe.Offsetof(cpu.X86.HasAVX)
1415
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
1516
offsetX86HasERMS = unsafe.Offsetof(cpu.X86.HasERMS)
1617
offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)

src/runtime/mkpreempt.go

+11-8
Original file line numberDiff line numberDiff line change
@@ -244,23 +244,26 @@ func genAMD64() {
244244

245245
// TODO: MXCSR register?
246246

247+
p("PUSHQ BP")
248+
p("MOVQ SP, BP")
249+
p("// Save flags before clobbering them")
250+
p("PUSHFQ")
251+
p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP")
252+
p("ADJSP $%d", l.stack)
253+
p("// But vet doesn't know ADJSP, so suppress vet stack checking")
254+
p("NOP SP")
255+
247256
// Apparently, the signal handling code path in darwin kernel leaves
248257
// the upper bits of Y registers in a dirty state, which causes
249258
// many SSE operations (128-bit and narrower) become much slower.
250259
// Clear the upper bits to get to a clean state. See issue #37174.
251260
// It is safe here as Go code don't use the upper bits of Y registers.
252261
p("#ifdef GOOS_darwin")
262+
p("CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0")
263+
p("JE 2(PC)")
253264
p("VZEROUPPER")
254265
p("#endif")
255266

256-
p("PUSHQ BP")
257-
p("MOVQ SP, BP")
258-
p("// Save flags before clobbering them")
259-
p("PUSHFQ")
260-
p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP")
261-
p("ADJSP $%d", l.stack)
262-
p("// But vet doesn't know ADJSP, so suppress vet stack checking")
263-
p("NOP SP")
264267
l.save()
265268
p("CALL ·asyncPreempt2(SB)")
266269
l.restore()

src/runtime/preempt_amd64.s

+5-3
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
#include "textflag.h"
55

66
TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
7-
#ifdef GOOS_darwin
8-
VZEROUPPER
9-
#endif
107
PUSHQ BP
118
MOVQ SP, BP
129
// Save flags before clobbering them
@@ -15,6 +12,11 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
1512
ADJSP $368
1613
// But vet doesn't know ADJSP, so suppress vet stack checking
1714
NOP SP
15+
#ifdef GOOS_darwin
16+
CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0
17+
JE 2(PC)
18+
VZEROUPPER
19+
#endif
1820
MOVQ AX, 0(SP)
1921
MOVQ CX, 8(SP)
2022
MOVQ DX, 16(SP)

0 commit comments

Comments
 (0)