Skip to content

Commit 8377f20

Browse files
Archana Rlaboger
Archana R
authored andcommitted
runtime: improve equal on ppc64x/power10
Rewrite equal asm function to use the new power10 instruction lxvl and stxvl- load and store with variable length which can simplify the tail end bytes comparison process. Cleaned up code on CR register usage. On power9 and power8 the code remains unchanged. The performance for multiple sizes<=16 improve on power10 with the change. name old time/op new time/op delta Equal/1 5.28ns ± 0% 4.19ns ± 9% -20.80% Equal/2 5.30ns ± 0% 4.29ns ± 6% -19.06% Equal/3 5.10ns ± 5% 4.20ns ± 6% -17.73% Equal/4 5.05ns ± 0% 4.42ns ± 4% -12.50% Equal/5 5.27ns ± 1% 4.44ns ± 4% -15.69% Equal/6 5.30ns ± 0% 4.38ns ±12% -17.44% Equal/7 5.02ns ± 6% 4.48ns ± 2% -10.64% Equal/9 4.53ns ± 0% 4.34ns ± 7% -4.21% Equal/16 4.52ns ± 0% 4.29ns ± 6% -5.16% Change-Id: Ie124906e3a5012dfe634bfe09af06be42f1b178b Reviewed-on: https://go-review.googlesource.com/c/go/+/473536 TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Cherry Mui <[email protected]> Run-TryBot: Lynn Boger <[email protected]> Reviewed-by: Dmitri Shuralyov <[email protected]> Reviewed-by: Paul Murphy <[email protected]>
1 parent b4ac4b4 commit 8377f20

File tree

1 file changed

+22
-20
lines changed

1 file changed

+22
-20
lines changed

src/internal/bytealg/equal_ppc64x.s

+22-20
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,6 @@
1010
// 4K (smallest case) page size offset mask for PPC64.
1111
#define PAGE_OFFSET 4095
1212

13-
// TODO: At writing, ISEL and BC do not support CR bit type arguments,
14-
// define them here for readability.
15-
#define CR0LT 4*0+0
16-
#define CR0EQ 4*0+2
17-
#define CR1LT 4*1+0
18-
#define CR6LT 4*6+0
19-
2013
// Likewise, the BC opcode is hard to read, and no extended
2114
// mnemonics are offered for these forms.
2215
#define BGELR_CR6 BC 4, CR6LT, (LR)
@@ -90,7 +83,7 @@ loop64:
9083
ADD $64,R4
9184
BDNZ loop64
9285

93-
ISEL $CR0EQ, R11, R3, R3 // If no tail, return 1, otherwise R3 remains 0.
86+
ISEL CR0EQ, R11, R3, R3 // If no tail, return 1, otherwise R3 remains 0.
9487
BEQLR // return if no tail.
9588

9689
ADD $-64, R9, R8
@@ -110,7 +103,7 @@ loop64:
110103
LXVD2X (R8+R16), V0
111104
LXVD2X (R4+R16), V1
112105
VCMPEQUBCC V0, V1, V2
113-
ISEL $CR6LT, R11, R0, R3
106+
ISEL CR6LT, R11, R0, R3
114107
RET
115108

116109
check33_64:
@@ -138,30 +131,38 @@ check17_32:
138131
LXVD2X (R8+R0), V0
139132
LXVD2X (R4+R0), V1
140133
VCMPEQUBCC V0, V1, V2
141-
ISEL $CR6LT, R11, R0, R5
134+
ISEL CR6LT, R11, R0, R5
142135

143136
// Load sX[len(sX)-16:len(sX)] and compare.
144137
ADD $-16, R9
145138
ADD $-16, R10
146139
LXVD2X (R9+R0), V0
147140
LXVD2X (R10+R0), V1
148141
VCMPEQUBCC V0, V1, V2
149-
ISEL $CR6LT, R5, R0, R3
142+
ISEL CR6LT, R5, R0, R3
150143
RET
151144

152145
check0_16:
146+
#ifdef GOPPC64_power10
147+
SLD $56, R5, R7
148+
LXVL R8, R7, V0
149+
LXVL R4, R7, V1
150+
VCMPEQUDCC V0, V1, V2
151+
ISEL CR6LT, R11, R0, R3
152+
RET
153+
#else
153154
CMP R5, $8
154155
BLT check0_7
155156
// Load sX[0:7] and compare.
156157
MOVD (R8), R6
157158
MOVD (R4), R7
158159
CMP R6, R7
159-
ISEL $CR0EQ, R11, R0, R5
160+
ISEL CR0EQ, R11, R0, R5
160161
// Load sX[len(sX)-8:len(sX)] and compare.
161162
MOVD -8(R9), R6
162163
MOVD -8(R10), R7
163164
CMP R6, R7
164-
ISEL $CR0EQ, R5, R0, R3
165+
ISEL CR0EQ, R5, R0, R3
165166
RET
166167

167168
check0_7:
@@ -183,8 +184,8 @@ check0_7:
183184
CMPU R9, R12, CR0
184185
SUB R12, R8, R6 // compute lower load address
185186
SUB R12, R4, R9
186-
ISEL $CR1LT, R8, R6, R8 // R8 = R6 < 0 ? R8 (&s1) : R6 (&s1 - (8-len))
187-
ISEL $CR0LT, R4, R9, R4 // Similar for s2
187+
ISEL CR1LT, R8, R6, R8 // R8 = R6 < 0 ? R8 (&s1) : R6 (&s1 - (8-len))
188+
ISEL CR0LT, R4, R9, R4 // Similar for s2
188189
MOVD (R8), R15
189190
MOVD (R4), R16
190191
SLD R14, R15, R7
@@ -194,12 +195,13 @@ check0_7:
194195
SRD R14, R15, R6 // Clear the lower (8-len) bytes
195196
SRD R14, R16, R9
196197
#ifdef GOARCH_ppc64le
197-
ISEL $CR1LT, R7, R6, R8 // Choose the correct len bytes to compare based on alignment
198-
ISEL $CR0LT, R17, R9, R4
198+
ISEL CR1LT, R7, R6, R8 // Choose the correct len bytes to compare based on alignment
199+
ISEL CR0LT, R17, R9, R4
199200
#else
200-
ISEL $CR1LT, R6, R7, R8
201-
ISEL $CR0LT, R9, R17, R4
201+
ISEL CR1LT, R6, R7, R8
202+
ISEL CR0LT, R9, R17, R4
202203
#endif
203204
CMP R4, R8
204-
ISEL $CR0EQ, R11, R0, R3
205+
ISEL CR0EQ, R11, R0, R3
205206
RET
207+
#endif // tail processing if !defined(GOPPC64_power10)

0 commit comments

Comments
 (0)