Skip to content

Commit 548a546

Browse files
abner-chencpull[bot]
authored andcommitted
cmd/compile, internal/runtime/atomic: add Xchg8 for loong64
In Loongson's new microstructure LA664 (Loongson-3A6000) and later, the atomic instruction AMSWAP[DB]{B,H} [1] is supported. Therefore, the implementation of the atomic operation exchange can be selected according to the CPUCFG flag LAM_BH: AMSWAPDBB(full barrier) instruction is used on new microstructures, and traditional LL-SC is used on LA464 (Loongson-3A5000) and older microstructures. This can significantly improve the performance of Go programs on new microstructures. Because Xchg8 implemented using traditional LL-SC uses too many temporary registers, it is not suitable for intrinsics. goos: linux goarch: loong64 pkg: internal/runtime/atomic cpu: Loongson-3A6000 @ 2500.00MHz BenchmarkXchg8 100000000 10.41 ns/op BenchmarkXchg8-2 100000000 10.41 ns/op BenchmarkXchg8-4 100000000 10.41 ns/op BenchmarkXchg8Parallel 96647592 12.41 ns/op BenchmarkXchg8Parallel-2 58376136 20.60 ns/op BenchmarkXchg8Parallel-4 78458899 17.97 ns/op goos: linux goarch: loong64 pkg: internal/runtime/atomic cpu: Loongson-3A5000-HV @ 2500.00MHz BenchmarkXchg8 38323825 31.23 ns/op BenchmarkXchg8-2 38368219 31.23 ns/op BenchmarkXchg8-4 37154156 31.26 ns/op BenchmarkXchg8Parallel 37908301 31.63 ns/op BenchmarkXchg8Parallel-2 30413440 39.42 ns/op BenchmarkXchg8Parallel-4 30737626 39.03 ns/op For #69735 [1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html Change-Id: I02ba68f66a2210b6902344fdc9975eb62de728ab Reviewed-on: https://go-review.googlesource.com/c/go/+/623058 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: sophie zhao <[email protected]> Reviewed-by: Meidan Li <[email protected]> Reviewed-by: Cherry Mui <[email protected]> Reviewed-by: Mauri de Souza Meneguzzo <[email protected]> Reviewed-by: Dmitri Shuralyov <[email protected]>
1 parent bcf0862 commit 548a546

File tree

10 files changed

+113
-1
lines changed

10 files changed

+113
-1
lines changed

src/cmd/compile/internal/loong64/ssa.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,15 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
722722
p.To.Reg = v.Args[0].Reg()
723723
p.RegTo2 = v.Reg0()
724724

725+
case ssa.OpLOONG64LoweredAtomicExchange8Variant:
726+
// AMSWAPDBB Rarg1, (Rarg0), Rout
727+
p := s.Prog(loong64.AAMSWAPDBB)
728+
p.From.Type = obj.TYPE_REG
729+
p.From.Reg = v.Args[1].Reg()
730+
p.To.Type = obj.TYPE_MEM
731+
p.To.Reg = v.Args[0].Reg()
732+
p.RegTo2 = v.Reg0()
733+
725734
case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64:
726735
// AMADDx Rarg1, (Rarg0), Rout
727736
// ADDV Rarg1, Rout, Rout

src/cmd/compile/internal/ssa/_gen/LOONG64.rules

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,7 @@
450450
(AtomicStorePtrNoWB ...) => (LoweredAtomicStore64 ...)
451451

452452
(AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...)
453+
(AtomicExchange8Variant ...) => (LoweredAtomicExchange8Variant ...)
453454

454455
(AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...)
455456

src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,11 @@ func init() {
466466
{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
467467
{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
468468

469+
// atomic exchange variant.
470+
// store arg1 to arg0. arg2=mem. returns <old content of *arg0, memory>. auxint must be zero.
471+
// AMSWAPDBB Rarg1, (Rarg0), Rout
472+
{name: "LoweredAtomicExchange8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
473+
469474
// atomic add.
470475
// *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>.
471476
{name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},

src/cmd/compile/internal/ssa/opGen.go

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/rewriteLOONG64.go

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssagen/intrinsics.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,41 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
439439
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange64, ssa.OpAtomicExchange64Variant, types.TUINT64, atomicEmitterARM64),
440440
sys.ARM64)
441441

442+
makeAtomicXchg8GuardedIntrinsicLoong64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
443+
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
444+
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLAM_BH, s.sb)
445+
v := s.load(types.Types[types.TBOOL], addr)
446+
b := s.endBlock()
447+
b.Kind = ssa.BlockIf
448+
b.SetControl(v)
449+
bTrue := s.f.NewBlock(ssa.BlockPlain)
450+
bFalse := s.f.NewBlock(ssa.BlockPlain)
451+
bEnd := s.f.NewBlock(ssa.BlockPlain)
452+
b.AddEdgeTo(bTrue)
453+
b.AddEdgeTo(bFalse)
454+
b.Likely = ssa.BranchLikely // most loong64 machines support the amswapdb.b
455+
456+
// We have the intrinsic - use it directly.
457+
s.startBlock(bTrue)
458+
s.vars[n] = s.newValue3(op, types.NewTuple(types.Types[types.TUINT8], types.TypeMem), args[0], args[1], s.mem())
459+
s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, s.vars[n])
460+
s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[types.TUINT8], s.vars[n])
461+
s.endBlock().AddEdgeTo(bEnd)
462+
463+
// Call the pure Go version.
464+
s.startBlock(bFalse)
465+
s.vars[n] = s.callResult(n, callNormal) // types.Types[TUINT8]
466+
s.endBlock().AddEdgeTo(bEnd)
467+
468+
// Merge results.
469+
s.startBlock(bEnd)
470+
return s.variable(n, types.Types[types.TUINT8])
471+
}
472+
}
473+
addF("internal/runtime/atomic", "Xchg8",
474+
makeAtomicXchg8GuardedIntrinsicLoong64(ssa.OpAtomicExchange8Variant),
475+
sys.Loong64)
476+
442477
addF("internal/runtime/atomic", "Xadd",
443478
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
444479
v := s.newValue3(ssa.OpAtomicAdd32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem())

src/cmd/compile/internal/ssagen/intrinsics_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
392392
{"loong64", "internal/runtime/atomic", "Xaddint32"}: struct{}{},
393393
{"loong64", "internal/runtime/atomic", "Xaddint64"}: struct{}{},
394394
{"loong64", "internal/runtime/atomic", "Xadduintptr"}: struct{}{},
395+
{"loong64", "internal/runtime/atomic", "Xchg8"}: struct{}{},
395396
{"loong64", "internal/runtime/atomic", "Xchg"}: struct{}{},
396397
{"loong64", "internal/runtime/atomic", "Xchg64"}: struct{}{},
397398
{"loong64", "internal/runtime/atomic", "Xchgint32"}: struct{}{},

src/internal/runtime/atomic/atomic_loong64.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ func Xadd64(ptr *uint64, delta int64) uint64
2525
//go:noescape
2626
func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
2727

28+
//go:noescape
29+
func Xchg8(ptr *uint8, new uint8) uint8
30+
2831
//go:noescape
2932
func Xchg(ptr *uint32, new uint32) uint32
3033

src/internal/runtime/atomic/atomic_loong64.s

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,44 @@ TEXT ·Xadd64(SB), NOSPLIT, $0-24
150150
MOVV R4, ret+16(FP)
151151
RET
152152

153+
// uint8 Xchg8(ptr *uint8, new uint8)
154+
// Atomically:
155+
// old := *ptr;
156+
// *ptr = new;
157+
// return old;
158+
TEXT ·Xchg8(SB), NOSPLIT, $0-17
159+
MOVV ptr+0(FP), R4
160+
MOVBU new+8(FP), R5
161+
162+
// R6 = ((ptr & 3) * 8)
163+
AND $3, R4, R6
164+
SLLV $3, R6
165+
166+
// R7 = ((0xFF) << R6) ^ (-1)
167+
MOVV $0xFF, R8
168+
SLLV R6, R8, R7
169+
XOR $-1, R7
170+
171+
// R4 = ptr & (~3)
172+
MOVV $~3, R8
173+
AND R8, R4
174+
175+
// R5 = ((val) << R6)
176+
SLLV R6, R5
177+
178+
DBAR $0x14 // LoadAcquire barrier
179+
_xchg8_again:
180+
LL (R4), R8
181+
MOVV R8, R9 // backup old val
182+
AND R7, R8
183+
OR R5, R8
184+
SC R8, (R4)
185+
BEQ R8, _xchg8_again
186+
DBAR $0x12 // StoreRelease barrier
187+
SRLV R6, R9, R9
188+
MOVBU R9, ret+16(FP)
189+
RET
190+
153191
// func Xchg(ptr *uint32, new uint32) uint32
154192
TEXT ·Xchg(SB), NOSPLIT, $0-20
155193
MOVV ptr+0(FP), R4

src/internal/runtime/atomic/xchg8_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
//go:build 386 || amd64 || arm || arm64 || ppc64 || ppc64le
5+
//go:build 386 || amd64 || arm || arm64 || loong64 || ppc64 || ppc64le
66

77
package atomic_test
88

0 commit comments

Comments
 (0)