Skip to content

Commit 882ec70

Browse files
committed
cmd/compile: add indexed load+op operations to amd64
name old time/op new time/op delta LoadAdd-16 545ns ± 0% 456ns ± 0% -16.31% (p=0.000 n=10+10) Update #36468 Change-Id: I84f390d55490648fa1f58cdbc24fd74c4f1bc8c1 Reviewed-on: https://go-review.googlesource.com/c/go/+/227960 Run-TryBot: Keith Randall <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Josh Bleecher Snyder <[email protected]>
1 parent 553e003 commit 882ec70

File tree

6 files changed

+657
-5
lines changed

6 files changed

+657
-5
lines changed

src/cmd/compile/internal/amd64/ssa.go

+22
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,28 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
840840
p := s.Prog(v.Op.Asm())
841841
p.From.Type = obj.TYPE_MEM
842842
p.From.Reg = v.Args[1].Reg()
843+
gc.AddAux(&p.From, v)
844+
p.To.Type = obj.TYPE_REG
845+
p.To.Reg = v.Reg()
846+
if v.Reg() != v.Args[0].Reg() {
847+
v.Fatalf("input[0] and output not in same register %s", v.LongString())
848+
}
849+
case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8,
850+
ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8,
851+
ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8,
852+
ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8,
853+
ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8:
854+
p := s.Prog(v.Op.Asm())
855+
856+
r, i := v.Args[1].Reg(), v.Args[2].Reg()
857+
p.From.Type = obj.TYPE_MEM
858+
p.From.Scale = v.Op.Scale()
859+
if p.From.Scale == 1 && i == x86.REG_SP {
860+
r, i = i, r
861+
}
862+
p.From.Reg = r
863+
p.From.Index = i
864+
843865
gc.AddAux(&p.From, v)
844866
p.To.Type = obj.TYPE_REG
845867
p.To.Reg = v.Reg()
+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Copyright 2020 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package gc
6+
7+
import "testing"
8+
9+
var globl int64
10+
11+
func BenchmarkLoadAdd(b *testing.B) {
12+
x := make([]int64, 1024)
13+
y := make([]int64, 1024)
14+
for i := 0; i < b.N; i++ {
15+
var s int64
16+
for i := range x {
17+
s ^= x[i] + y[i]
18+
}
19+
globl = s
20+
}
21+
}

src/cmd/compile/internal/ssa/addressingmodes.go

+37
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,43 @@ var combine = map[[2]Op]Op{
217217
[2]Op{OpAMD64CMPQconstload, OpAMD64LEAQ1}: OpAMD64CMPQconstloadidx1,
218218
[2]Op{OpAMD64CMPQconstload, OpAMD64LEAQ8}: OpAMD64CMPQconstloadidx8,
219219

220+
[2]Op{OpAMD64ADDLload, OpAMD64ADDQ}: OpAMD64ADDLloadidx1,
221+
[2]Op{OpAMD64ADDQload, OpAMD64ADDQ}: OpAMD64ADDQloadidx1,
222+
[2]Op{OpAMD64SUBLload, OpAMD64ADDQ}: OpAMD64SUBLloadidx1,
223+
[2]Op{OpAMD64SUBQload, OpAMD64ADDQ}: OpAMD64SUBQloadidx1,
224+
[2]Op{OpAMD64ANDLload, OpAMD64ADDQ}: OpAMD64ANDLloadidx1,
225+
[2]Op{OpAMD64ANDQload, OpAMD64ADDQ}: OpAMD64ANDQloadidx1,
226+
[2]Op{OpAMD64ORLload, OpAMD64ADDQ}: OpAMD64ORLloadidx1,
227+
[2]Op{OpAMD64ORQload, OpAMD64ADDQ}: OpAMD64ORQloadidx1,
228+
[2]Op{OpAMD64XORLload, OpAMD64ADDQ}: OpAMD64XORLloadidx1,
229+
[2]Op{OpAMD64XORQload, OpAMD64ADDQ}: OpAMD64XORQloadidx1,
230+
231+
[2]Op{OpAMD64ADDLload, OpAMD64LEAQ1}: OpAMD64ADDLloadidx1,
232+
[2]Op{OpAMD64ADDLload, OpAMD64LEAQ4}: OpAMD64ADDLloadidx4,
233+
[2]Op{OpAMD64ADDLload, OpAMD64LEAQ8}: OpAMD64ADDLloadidx8,
234+
[2]Op{OpAMD64ADDQload, OpAMD64LEAQ1}: OpAMD64ADDQloadidx1,
235+
[2]Op{OpAMD64ADDQload, OpAMD64LEAQ8}: OpAMD64ADDQloadidx8,
236+
[2]Op{OpAMD64SUBLload, OpAMD64LEAQ1}: OpAMD64SUBLloadidx1,
237+
[2]Op{OpAMD64SUBLload, OpAMD64LEAQ4}: OpAMD64SUBLloadidx4,
238+
[2]Op{OpAMD64SUBLload, OpAMD64LEAQ8}: OpAMD64SUBLloadidx8,
239+
[2]Op{OpAMD64SUBQload, OpAMD64LEAQ1}: OpAMD64SUBQloadidx1,
240+
[2]Op{OpAMD64SUBQload, OpAMD64LEAQ8}: OpAMD64SUBQloadidx8,
241+
[2]Op{OpAMD64ANDLload, OpAMD64LEAQ1}: OpAMD64ANDLloadidx1,
242+
[2]Op{OpAMD64ANDLload, OpAMD64LEAQ4}: OpAMD64ANDLloadidx4,
243+
[2]Op{OpAMD64ANDLload, OpAMD64LEAQ8}: OpAMD64ANDLloadidx8,
244+
[2]Op{OpAMD64ANDQload, OpAMD64LEAQ1}: OpAMD64ANDQloadidx1,
245+
[2]Op{OpAMD64ANDQload, OpAMD64LEAQ8}: OpAMD64ANDQloadidx8,
246+
[2]Op{OpAMD64ORLload, OpAMD64LEAQ1}: OpAMD64ORLloadidx1,
247+
[2]Op{OpAMD64ORLload, OpAMD64LEAQ4}: OpAMD64ORLloadidx4,
248+
[2]Op{OpAMD64ORLload, OpAMD64LEAQ8}: OpAMD64ORLloadidx8,
249+
[2]Op{OpAMD64ORQload, OpAMD64LEAQ1}: OpAMD64ORQloadidx1,
250+
[2]Op{OpAMD64ORQload, OpAMD64LEAQ8}: OpAMD64ORQloadidx8,
251+
[2]Op{OpAMD64XORLload, OpAMD64LEAQ1}: OpAMD64XORLloadidx1,
252+
[2]Op{OpAMD64XORLload, OpAMD64LEAQ4}: OpAMD64XORLloadidx4,
253+
[2]Op{OpAMD64XORLload, OpAMD64LEAQ8}: OpAMD64XORLloadidx8,
254+
[2]Op{OpAMD64XORQload, OpAMD64LEAQ1}: OpAMD64XORQloadidx1,
255+
[2]Op{OpAMD64XORQload, OpAMD64LEAQ8}: OpAMD64XORQloadidx8,
256+
220257
// 386
221258
[2]Op{Op386MOVBload, Op386ADDL}: Op386MOVBloadidx1,
222259
[2]Op{Op386MOVWload, Op386ADDL}: Op386MOVWloadidx1,

src/cmd/compile/internal/ssa/gen/AMD64Ops.go

+31-4
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,11 @@ func init() {
136136
readflags = regInfo{inputs: nil, outputs: gponly}
137137
flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
138138

139-
gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
140-
gp21load = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
141-
gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
142-
gp21pax = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
139+
gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
140+
gp21load = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
141+
gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
142+
gp21loadidx = regInfo{inputs: []regMask{gp, gpspsb, gpsp, 0}, outputs: gponly}
143+
gp21pax = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
143144

144145
gpstore = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
145146
gpstoreconst = regInfo{inputs: []regMask{gpspsb, 0}}
@@ -409,6 +410,32 @@ func init() {
409410
{name: "XORQload", argLength: 3, reg: gp21load, asm: "XORQ", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
410411
{name: "XORLload", argLength: 3, reg: gp21load, asm: "XORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
411412

413+
{name: "ADDLloadidx1", argLength: 4, reg: gp21loadidx, asm: "ADDL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
414+
{name: "ADDLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ADDL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem
415+
{name: "ADDLloadidx8", argLength: 4, reg: gp21loadidx, asm: "ADDL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
416+
{name: "ADDQloadidx1", argLength: 4, reg: gp21loadidx, asm: "ADDQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
417+
{name: "ADDQloadidx8", argLength: 4, reg: gp21loadidx, asm: "ADDQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
418+
{name: "SUBLloadidx1", argLength: 4, reg: gp21loadidx, asm: "SUBL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
419+
{name: "SUBLloadidx4", argLength: 4, reg: gp21loadidx, asm: "SUBL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem
420+
{name: "SUBLloadidx8", argLength: 4, reg: gp21loadidx, asm: "SUBL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
421+
{name: "SUBQloadidx1", argLength: 4, reg: gp21loadidx, asm: "SUBQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
422+
{name: "SUBQloadidx8", argLength: 4, reg: gp21loadidx, asm: "SUBQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
423+
{name: "ANDLloadidx1", argLength: 4, reg: gp21loadidx, asm: "ANDL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
424+
{name: "ANDLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ANDL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem
425+
{name: "ANDLloadidx8", argLength: 4, reg: gp21loadidx, asm: "ANDL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
426+
{name: "ANDQloadidx1", argLength: 4, reg: gp21loadidx, asm: "ANDQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
427+
{name: "ANDQloadidx8", argLength: 4, reg: gp21loadidx, asm: "ANDQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
428+
{name: "ORLloadidx1", argLength: 4, reg: gp21loadidx, asm: "ORL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
429+
{name: "ORLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ORL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem
430+
{name: "ORLloadidx8", argLength: 4, reg: gp21loadidx, asm: "ORL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
431+
{name: "ORQloadidx1", argLength: 4, reg: gp21loadidx, asm: "ORQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
432+
{name: "ORQloadidx8", argLength: 4, reg: gp21loadidx, asm: "ORQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
433+
{name: "XORLloadidx1", argLength: 4, reg: gp21loadidx, asm: "XORL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
434+
{name: "XORLloadidx4", argLength: 4, reg: gp21loadidx, asm: "XORL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem
435+
{name: "XORLloadidx8", argLength: 4, reg: gp21loadidx, asm: "XORL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
436+
{name: "XORQloadidx1", argLength: 4, reg: gp21loadidx, asm: "XORQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
437+
{name: "XORQloadidx8", argLength: 4, reg: gp21loadidx, asm: "XORQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
438+
412439
// direct binary-op on memory (read-modify-write)
413440
{name: "ADDQmodify", argLength: 3, reg: gpstore, asm: "ADDQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) += arg1, arg2=mem
414441
{name: "SUBQmodify", argLength: 3, reg: gpstore, asm: "SUBQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) -= arg1, arg2=mem

0 commit comments

Comments
 (0)