Skip to content

Commit 30f8074

Browse files
Mark Ryandr2chase
Mark Ryan
authored andcommitted
cmd/internal/obj/x86: prevent jumps crossing 32 byte boundaries
This commit adds a new option to the x86 assembler. If the GOAMD64 environment variable is set to alignedjumps (the default) and we're doing a 64 bit build, the assembler will make sure that neither stand alone nor macro-fused jumps will end on or cross 32 byte boundaries. To achieve this, functions are aligned on 32 byte boundaries, rather than 16 bytes, and jump instructions are padded to ensure that they do not cross or end on 32 byte boundaries. Jumps are padded by adding a NOP instruction of the appropriate length before the jump. The commit is likely to result in larger binary sizes when GOAMD64=alignedjumps. On the binaries tested so far, an increase of between 1.4% and 1.5% has been observed. Updates #35881 Co-authored-by: David Chase <[email protected]> Change-Id: Ief0722300bc3f987098e4fd92b22b14ad6281d91 Reviewed-on: https://go-review.googlesource.com/c/go/+/219357 Reviewed-by: Cherry Zhang <[email protected]> Run-TryBot: Cherry Zhang <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent 8ba0e91 commit 30f8074

File tree

10 files changed

+263
-3
lines changed

10 files changed

+263
-3
lines changed

src/cmd/asm/internal/asm/endtoend_test.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,12 @@ func TestARM64Errors(t *testing.T) {
390390
}
391391

392392
func TestAMD64EndToEnd(t *testing.T) {
393-
testEndToEnd(t, "amd64", "amd64")
393+
defer func(old string) { objabi.GOAMD64 = old }(objabi.GOAMD64)
394+
for _, goamd64 := range []string{"normaljumps", "alignedjumps"} {
395+
t.Logf("GOAMD64=%s", goamd64)
396+
objabi.GOAMD64 = goamd64
397+
testEndToEnd(t, "amd64", "amd64")
398+
}
394399
}
395400

396401
func Test386Encoder(t *testing.T) {

src/cmd/dist/build.go

+11
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ var (
3131
goos string
3232
goarm string
3333
go386 string
34+
goamd64 string
3435
gomips string
3536
gomips64 string
3637
goppc64 string
@@ -151,6 +152,12 @@ func xinit() {
151152
}
152153
go386 = b
153154

155+
b = os.Getenv("GOAMD64")
156+
if b == "" {
157+
b = "alignedjumps"
158+
}
159+
goamd64 = b
160+
154161
b = os.Getenv("GOMIPS")
155162
if b == "" {
156163
b = "hardfloat"
@@ -223,6 +230,7 @@ func xinit() {
223230

224231
// For tools being invoked but also for os.ExpandEnv.
225232
os.Setenv("GO386", go386)
233+
os.Setenv("GOAMD64", goamd64)
226234
os.Setenv("GOARCH", goarch)
227235
os.Setenv("GOARM", goarm)
228236
os.Setenv("GOHOSTARCH", gohostarch)
@@ -1163,6 +1171,9 @@ func cmdenv() {
11631171
if goarch == "386" {
11641172
xprintf(format, "GO386", go386)
11651173
}
1174+
if goarch == "amd64" {
1175+
xprintf(format, "GOAMD64", goamd64)
1176+
}
11661177
if goarch == "mips" || goarch == "mipsle" {
11671178
xprintf(format, "GOMIPS", gomips)
11681179
}

src/cmd/dist/buildruntime.go

+2
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ func mkzversion(dir, file string) {
4242
//
4343
// const defaultGOROOT = <goroot>
4444
// const defaultGO386 = <go386>
45+
// const defaultGOAMD64 = <goamd64>
4546
// const defaultGOARM = <goarm>
4647
// const defaultGOMIPS = <gomips>
4748
// const defaultGOMIPS64 = <gomips64>
@@ -71,6 +72,7 @@ func mkzbootstrap(file string) {
7172
fmt.Fprintf(&buf, "import \"runtime\"\n")
7273
fmt.Fprintln(&buf)
7374
fmt.Fprintf(&buf, "const defaultGO386 = `%s`\n", go386)
75+
fmt.Fprintf(&buf, "const defaultGOAMD64 = `%s`\n", goamd64)
7476
fmt.Fprintf(&buf, "const defaultGOARM = `%s`\n", goarm)
7577
fmt.Fprintf(&buf, "const defaultGOMIPS = `%s`\n", gomips)
7678
fmt.Fprintf(&buf, "const defaultGOMIPS64 = `%s`\n", gomips64)

src/cmd/go/alldocs.go

+3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/go/internal/cfg/cfg.go

+3
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ var (
241241
// Used in envcmd.MkEnv and build ID computations.
242242
GOARM = envOr("GOARM", fmt.Sprint(objabi.GOARM))
243243
GO386 = envOr("GO386", objabi.GO386)
244+
GOAMD64 = envOr("GOAMD64", objabi.GOAMD64)
244245
GOMIPS = envOr("GOMIPS", objabi.GOMIPS)
245246
GOMIPS64 = envOr("GOMIPS64", objabi.GOMIPS64)
246247
GOPPC64 = envOr("GOPPC64", fmt.Sprintf("%s%d", "power", objabi.GOPPC64))
@@ -266,6 +267,8 @@ func GetArchEnv() (key, val string) {
266267
return "GOARM", GOARM
267268
case "386":
268269
return "GO386", GO386
270+
case "amd64":
271+
return "GOAMD64", GOAMD64
269272
case "mips", "mipsle":
270273
return "GOMIPS", GOMIPS
271274
case "mips64", "mips64le":

src/cmd/go/internal/help/helpdoc.go

+3
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,9 @@ Architecture-specific environment variables:
582582
GO386
583583
For GOARCH=386, the floating point instruction set.
584584
Valid values are 387, sse2.
585+
GOAMD64
586+
For GOARCH=amd64, jumps can be optionally be aligned such that they do not end on
587+
or cross 32 byte boundaries. Valid values are alignedjumps (default), normaljumps.
585588
GOMIPS
586589
For GOARCH=mips{,le}, whether to use floating point instructions.
587590
Valid values are hardfloat (default), softfloat.

src/cmd/internal/obj/x86/asm6.go

+218-1
Original file line numberDiff line numberDiff line change
@@ -1838,14 +1838,210 @@ func fillnop(p []byte, n int) {
18381838
}
18391839
}
18401840

1841+
func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
1842+
s.Grow(int64(c) + int64(pad))
1843+
fillnop(s.P[c:], int(pad))
1844+
return c + pad
1845+
}
1846+
18411847
func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
18421848
if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
18431849
return l
18441850
}
18451851
return q
18461852
}
18471853

1854+
// If the environment variable GOAMD64=alignedjumps the assembler will ensure that
1855+
// no standalone or macro-fused jump will straddle or end on a 32 byte boundary
1856+
// by inserting NOPs before the jumps
1857+
func isJump(p *obj.Prog) bool {
1858+
return p.Pcond != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
1859+
p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
1860+
}
1861+
1862+
// lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
1863+
// jump. Otherwise, nil is returned.
1864+
func lookForJCC(p *obj.Prog) *obj.Prog {
1865+
// Skip any PCDATA, FUNCDATA or NOP instructions
1866+
var q *obj.Prog
1867+
for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
1868+
}
1869+
1870+
if q == nil || q.Pcond == nil || p.As == obj.AJMP || p.As == obj.ACALL {
1871+
return nil
1872+
}
1873+
1874+
switch q.As {
1875+
case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
1876+
AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
1877+
default:
1878+
return nil
1879+
}
1880+
1881+
return q
1882+
}
1883+
1884+
// fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
1885+
// If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
1886+
// Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
1887+
func fusedJump(p *obj.Prog) (bool, uint8) {
1888+
var fusedSize uint8
1889+
1890+
// The first instruction in a macro fused pair may be preceeded by the LOCK prefix,
1891+
// or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
1892+
// need to be careful to insert any padding before the locks rather than directly after them.
1893+
1894+
if p.As == AXRELEASE || p.As == AXACQUIRE {
1895+
fusedSize += p.Isize
1896+
for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
1897+
}
1898+
if p == nil {
1899+
return false, 0
1900+
}
1901+
}
1902+
if p.As == ALOCK {
1903+
fusedSize += p.Isize
1904+
for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
1905+
}
1906+
if p == nil {
1907+
return false, 0
1908+
}
1909+
}
1910+
cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
1911+
1912+
cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
1913+
p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
1914+
1915+
testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
1916+
p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
1917+
1918+
incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
1919+
p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
1920+
1921+
if !cmpAddSub && !testAnd && !incDec {
1922+
return false, 0
1923+
}
1924+
1925+
if !incDec {
1926+
var argOne obj.AddrType
1927+
var argTwo obj.AddrType
1928+
if cmp {
1929+
argOne = p.From.Type
1930+
argTwo = p.To.Type
1931+
} else {
1932+
argOne = p.To.Type
1933+
argTwo = p.From.Type
1934+
}
1935+
if argOne == obj.TYPE_REG {
1936+
if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
1937+
return false, 0
1938+
}
1939+
} else if argOne == obj.TYPE_MEM {
1940+
if argTwo != obj.TYPE_REG {
1941+
return false, 0
1942+
}
1943+
} else {
1944+
return false, 0
1945+
}
1946+
}
1947+
1948+
fusedSize += p.Isize
1949+
jmp := lookForJCC(p)
1950+
if jmp == nil {
1951+
return false, 0
1952+
}
1953+
1954+
fusedSize += jmp.Isize
1955+
1956+
if testAnd {
1957+
return true, fusedSize
1958+
}
1959+
1960+
if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
1961+
jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
1962+
return false, 0
1963+
}
1964+
1965+
if cmpAddSub {
1966+
return true, fusedSize
1967+
}
1968+
1969+
if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
1970+
return false, 0
1971+
}
1972+
1973+
return true, fusedSize
1974+
}
1975+
1976+
type padJumpsCtx int32
1977+
1978+
func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
1979+
// Disable jump padding on 32 bit builds by settting
1980+
// padJumps to 0.
1981+
if ctxt.Arch.Family == sys.I386 {
1982+
return padJumpsCtx(0)
1983+
}
1984+
1985+
// Disable jump padding for hand written assembly code.
1986+
if ctxt.IsAsm {
1987+
return padJumpsCtx(0)
1988+
}
1989+
1990+
if objabi.GOAMD64 != "alignedjumps" {
1991+
return padJumpsCtx(0)
1992+
1993+
}
1994+
1995+
return padJumpsCtx(32)
1996+
}
1997+
1998+
// padJump detects whether the instruction being assembled is a standalone or a macro-fused
1999+
// jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
2000+
// not cross or end on a 32 byte boundary.
2001+
func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
2002+
if pjc == 0 {
2003+
return c
2004+
}
2005+
2006+
var toPad int32
2007+
fj, fjSize := fusedJump(p)
2008+
mask := int32(pjc - 1)
2009+
if fj {
2010+
if (c&mask)+int32(fjSize) >= int32(pjc) {
2011+
toPad = int32(pjc) - (c & mask)
2012+
}
2013+
} else if isJump(p) {
2014+
if (c&mask)+int32(p.Isize) >= int32(pjc) {
2015+
toPad = int32(pjc) - (c & mask)
2016+
}
2017+
}
2018+
if toPad <= 0 {
2019+
return c
2020+
}
2021+
2022+
return noppad(ctxt, s, c, toPad)
2023+
}
2024+
2025+
// reAssemble is called if an instruction's size changes during assembly. If
2026+
// it does and the instruction is a standalone or a macro-fused jump we need to
2027+
// reassemble.
2028+
func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
2029+
if pjc == 0 {
2030+
return false
2031+
}
2032+
2033+
fj, _ := fusedJump(p)
2034+
return fj || isJump(p)
2035+
}
2036+
2037+
type nopPad struct {
2038+
p *obj.Prog // Instruction before the pad
2039+
n int32 // Size of the pad
2040+
}
2041+
18482042
func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
2043+
pjc := makePjcCtx(ctxt)
2044+
18492045
if s.P != nil {
18502046
return
18512047
}
@@ -1903,6 +2099,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
19032099
var n int
19042100
var c int32
19052101
errors := ctxt.Errors
2102+
var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
19062103
for {
19072104
// This loop continues while there are reasons to re-assemble
19082105
// whole block, like the presence of long forward jumps.
@@ -1913,9 +2110,13 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
19132110
s.R = s.R[:0]
19142111
s.P = s.P[:0]
19152112
c = 0
2113+
var pPrev *obj.Prog
2114+
nops = nops[:0]
19162115
for p := s.Func.Text; p != nil; p = p.Link {
2116+
c0 := c
2117+
c = pjc.padJump(ctxt, s, p, c)
19172118

1918-
if (p.Back&branchLoopHead != 0) && c&(loopAlign-1) != 0 {
2119+
if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
19192120
// pad with NOPs
19202121
v := -c & (loopAlign - 1)
19212122

@@ -1954,11 +2155,21 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
19542155
m := ab.Len()
19552156
if int(p.Isize) != m {
19562157
p.Isize = uint8(m)
2158+
if pjc.reAssemble(p) {
2159+
// We need to re-assemble here to check for jumps and fused jumps
2160+
// that span or end on 32 byte boundaries.
2161+
reAssemble = true
2162+
}
19572163
}
19582164

19592165
s.Grow(p.Pc + int64(m))
19602166
copy(s.P[p.Pc:], ab.Bytes())
2167+
// If there was padding, remember it.
2168+
if pPrev != nil && !ctxt.IsAsm && c > c0 {
2169+
nops = append(nops, nopPad{p: pPrev, n: c - c0})
2170+
}
19612171
c += int32(m)
2172+
pPrev = p
19622173
}
19632174

19642175
n++
@@ -1973,6 +2184,12 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
19732184
return
19742185
}
19752186
}
2187+
// splice padding nops into Progs
2188+
for _, n := range nops {
2189+
pp := n.p
2190+
np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
2191+
pp.Link = np
2192+
}
19762193

19772194
s.Size = int64(c)
19782195

src/cmd/internal/objabi/util.go

+10
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ var (
2525
GOARCH = envOr("GOARCH", defaultGOARCH)
2626
GOOS = envOr("GOOS", defaultGOOS)
2727
GO386 = envOr("GO386", defaultGO386)
28+
GOAMD64 = goamd64()
2829
GOARM = goarm()
2930
GOMIPS = gomips()
3031
GOMIPS64 = gomips64()
@@ -39,6 +40,15 @@ const (
3940
MachoRelocOffset = 2048 // reserve enough space for ELF relocations
4041
)
4142

43+
func goamd64() string {
44+
switch v := envOr("GOAMD64", defaultGOAMD64); v {
45+
case "normaljumps", "alignedjumps":
46+
return v
47+
}
48+
log.Fatalf("Invalid GOAMD64 value. Must be normaljumps or alignedjumps.")
49+
panic("unreachable")
50+
}
51+
4252
func goarm() int {
4353
switch v := envOr("GOARM", defaultGOARM); v {
4454
case "5":

0 commit comments

Comments
 (0)