-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[x86-64 BMI1] AND-NOT's should be preferred to OR-NOT's because we have andn
/vpandn
instructions
#108731
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Comments
@llvm/issue-subscribers-backend-x86 Author: Niles Salter (Validark)
Zig code:
export fn foo(w: u64, x: u64, y: u64, z: u64) u64 {
var s = w;
s &= ~(x & y);
s &= ~(z & ~y);
return s;
}
export fn fooVec(w: @<!-- -->Vector(16, u8), x: @<!-- -->Vector(16, u8), y: @<!-- -->Vector(16, u8), z: @<!-- -->Vector(16, u8)) @<!-- -->Vector(16, u8) {
var s = w;
s &= ~(x & y);
s &= ~(z & ~y);
return s;
} Compiled for Znver3: foo:
not rcx
and rsi, rdx
andn rax, rsi, rdi
or rcx, rdx
and rax, rcx
ret
fooVec:
vpand xmm1, xmm2, xmm1
vpcmpeqd xmm4, xmm4, xmm4
vpandn xmm0, xmm1, xmm0
vpxor xmm1, xmm3, xmm4
vpor xmm1, xmm1, xmm2
vpand xmm0, xmm0, xmm1
ret I expected it to be LLVM (optimized): define dso_local i64 @<!-- -->foo(i64 %0, i64 %1, i64 %2, i64 %3) local_unnamed_addr {
Entry:
%4 = and i64 %2, %1
%5 = xor i64 %4, -1
%6 = and i64 %5, %0
%.not = xor i64 %3, -1
%7 = or i64 %.not, %2
%8 = and i64 %6, %7
ret i64 %8
}
declare void @<!-- -->llvm.dbg.value(metadata, metadata, metadata) #<!-- -->1
define dso_local <16 x i8> @<!-- -->fooVec(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3) local_unnamed_addr {
Entry:
%4 = and <16 x i8> %2, %1
%5 = xor <16 x i8> %4, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%6 = and <16 x i8> %5, %0
%.not = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%7 = or <16 x i8> %.not, %2
%8 = and <16 x i8> %6, %7
ret <16 x i8> %8
} |
I've tagged this as x86 but hopefully we can handle this generically |
…9215) When `andn` is available, we should avoid switching `s &= ~(z & ~y);` into `s &= ~z | y;` This patch turns this assembly from: ``` foo: not rcx and rsi, rdx andn rax, rsi, rdi or rcx, rdx and rax, rcx ret ``` into: ``` foo: and rsi, rdx andn rcx, rdx, rcx andn rax, rsi, rdi andn rax, rcx, rax ret ``` Fixes #108731
…m#109215) When `andn` is available, we should avoid switching `s &= ~(z & ~y);` into `s &= ~z | y;` This patch turns this assembly from: ``` foo: not rcx and rsi, rdx andn rax, rsi, rdi or rcx, rdx and rax, rcx ret ``` into: ``` foo: and rsi, rdx andn rcx, rdx, rcx andn rax, rsi, rdi andn rax, rcx, rax ret ``` Fixes llvm#108731
Zig code:
Compiled for Znver3:
I expected it to be
and
+andn
+andn
+andn
for the scalar version, andvpand
+vpandn
+vpandn
+vpandn
for the vector version. However, it turnss &= ~(z & ~y);
intos &= ~z | y;
. While this makes sense on machines withoutandn
(and doesn't matter for machines which haveandn
andorn
, or justvpternlogq
), with x86-64 BMI1 we only getandn
. We never got anorn
, so this optimization hurts us.LLVM (optimized):
LLVM Godbolt link
The text was updated successfully, but these errors were encountered: