Skip to content

Commit ebe49f9

Browse files
committed
cmd/compile: inline constant sized memclrNoHeapPointers calls on PPC64
Update the function isInlinableMemclr for ppc64 and ppc64le to enable inlining for the constant sized cases < 512. Larger cases can use dcbz which performs better but requires alignment checking so it is best to continue using memclrNoHeapPointers for those cases. Results on p10: MemclrKnownSize1 2.07ns ± 0% 0.57ns ± 0% -72.59% MemclrKnownSize2 2.56ns ± 5% 0.57ns ± 0% -77.82% MemclrKnownSize4 5.15ns ± 0% 0.57ns ± 0% -89.00% MemclrKnownSize8 2.23ns ± 0% 0.57ns ± 0% -74.57% MemclrKnownSize16 2.23ns ± 0% 0.50ns ± 0% -77.74% MemclrKnownSize32 2.28ns ± 0% 0.56ns ± 0% -75.28% MemclrKnownSize64 2.49ns ± 0% 0.72ns ± 0% -70.95% MemclrKnownSize112 2.97ns ± 2% 1.14ns ± 0% -61.72% MemclrKnownSize128 4.64ns ± 6% 2.45ns ± 1% -47.17% MemclrKnownSize192 5.45ns ± 5% 2.79ns ± 0% -48.87% MemclrKnownSize248 4.51ns ± 0% 2.83ns ± 0% -37.12% MemclrKnownSize256 6.34ns ± 1% 3.58ns ± 0% -43.53% MemclrKnownSize512 3.64ns ± 0% 3.64ns ± 0% -0.03% MemclrKnownSize1024 4.73ns ± 0% 4.73ns ± 0% +0.01% MemclrKnownSize4096 17.1ns ± 0% 17.1ns ± 0% +0.07% MemclrKnownSize512KiB 2.12µs ± 0% 2.12µs ± 0% ~ (all equal) Change-Id: If1abf5749f4802c64523a41fe0058bd144d0ea46 Reviewed-on: https://go-review.googlesource.com/c/go/+/464340 Run-TryBot: Lynn Boger <[email protected]> Reviewed-by: Jakub Ciolek <[email protected]> Reviewed-by: Archana Ravindar <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]> Reviewed-by: Carlos Eduardo Seo <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Than McIntosh <[email protected]>
1 parent 612c00b commit ebe49f9

File tree

4 files changed

+25
-11
lines changed

4 files changed

+25
-11
lines changed

src/cmd/compile/internal/ssa/_gen/generic.rules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2070,7 +2070,7 @@
20702070
// Turn known-size calls to memclrNoHeapPointers into a Zero.
20712071
// Note that we are using types.Types[types.TUINT8] instead of sptr.Type.Elem() - see issue 55122 and CL 431496 for more details.
20722072
(SelectN [0] call:(StaticCall {sym} sptr (Const(64|32) [c]) mem))
2073-
&& isInlinableMemclr(config)
2073+
&& isInlinableMemclr(config, int64(c))
20742074
&& isSameCall(sym, "runtime.memclrNoHeapPointers")
20752075
&& call.Uses == 1
20762076
&& clobber(call)

src/cmd/compile/internal/ssa/rewrite.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1365,10 +1365,16 @@ func zeroUpper56Bits(x *Value, depth int) bool {
13651365
return false
13661366
}
13671367

1368-
func isInlinableMemclr(c *Config) bool {
1368+
func isInlinableMemclr(c *Config, sz int64) bool {
13691369
// TODO: expand this check to allow other architectures
13701370
// see CL 454255 and issue 56997
1371-
return c.arch == "amd64" || c.arch == "arm64"
1371+
switch c.arch {
1372+
case "amd64", "arm64":
1373+
return true
1374+
case "ppc64le", "ppc64":
1375+
return sz < 512
1376+
}
1377+
return false
13721378
}
13731379

13741380
// isInlinableMemmove reports whether the given arch performs a Move of the given size

src/cmd/compile/internal/ssa/rewritegeneric.go

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/codegen/slices.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func SliceExtensionConst(s []int) []int {
4747
// amd64:-`.*runtime\.makeslice`
4848
// amd64:-`.*runtime\.panicmakeslicelen`
4949
// amd64:"MOVUPS\tX15"
50-
// ppc64x:`.*runtime\.memclrNoHeapPointers`
50+
// ppc64x:-`.*runtime\.memclrNoHeapPointers`
5151
// ppc64x:-`.*runtime\.makeslice`
5252
// ppc64x:-`.*runtime\.panicmakeslicelen`
5353
return append(s, make([]int, 1<<2)...)
@@ -58,7 +58,7 @@ func SliceExtensionConstInt64(s []int) []int {
5858
// amd64:-`.*runtime\.makeslice`
5959
// amd64:-`.*runtime\.panicmakeslicelen`
6060
// amd64:"MOVUPS\tX15"
61-
// ppc64x:`.*runtime\.memclrNoHeapPointers`
61+
// ppc64x:-`.*runtime\.memclrNoHeapPointers`
6262
// ppc64x:-`.*runtime\.makeslice`
6363
// ppc64x:-`.*runtime\.panicmakeslicelen`
6464
return append(s, make([]int, int64(1<<2))...)
@@ -69,7 +69,7 @@ func SliceExtensionConstUint64(s []int) []int {
6969
// amd64:-`.*runtime\.makeslice`
7070
// amd64:-`.*runtime\.panicmakeslicelen`
7171
// amd64:"MOVUPS\tX15"
72-
// ppc64x:`.*runtime\.memclrNoHeapPointers`
72+
// ppc64x:-`.*runtime\.memclrNoHeapPointers`
7373
// ppc64x:-`.*runtime\.makeslice`
7474
// ppc64x:-`.*runtime\.panicmakeslicelen`
7575
return append(s, make([]int, uint64(1<<2))...)
@@ -80,12 +80,20 @@ func SliceExtensionConstUint(s []int) []int {
8080
// amd64:-`.*runtime\.makeslice`
8181
// amd64:-`.*runtime\.panicmakeslicelen`
8282
// amd64:"MOVUPS\tX15"
83-
// ppc64x:`.*runtime\.memclrNoHeapPointers`
83+
// ppc64x:-`.*runtime\.memclrNoHeapPointers`
8484
// ppc64x:-`.*runtime\.makeslice`
8585
// ppc64x:-`.*runtime\.panicmakeslicelen`
8686
return append(s, make([]int, uint(1<<2))...)
8787
}
8888

89+
// On ppc64x continue to use memclrNoHeapPointers
90+
// for sizes >= 512.
91+
func SliceExtensionConst512(s []int) []int {
92+
// amd64:-`.*runtime\.memclrNoHeapPointers`
93+
// ppc64x:`.*runtime\.memclrNoHeapPointers`
94+
return append(s, make([]int, 1<<9)...)
95+
}
96+
8997
func SliceExtensionPointer(s []*int, l int) []*int {
9098
// amd64:`.*runtime\.memclrHasPointers`
9199
// amd64:-`.*runtime\.makeslice`

0 commit comments

Comments
 (0)