Skip to content

Commit 6fd229a

Browse files
authored
[X86] Invert (and X, ~(and ~Y, Z)) back into (and X, (or Y, ~Z)) (#109215)
When `andn` is available, we should avoid switching `s &= ~(z & ~y);` into `s &= ~z | y;` This patch turns this assembly from: ``` foo: not rcx and rsi, rdx andn rax, rsi, rdi or rcx, rdx and rax, rcx ret ``` into: ``` foo: and rsi, rdx andn rcx, rdx, rcx andn rax, rsi, rdi andn rax, rcx, rax ret ``` Fixes #108731
1 parent b9cae45 commit 6fd229a

File tree

2 files changed

+86
-0
lines changed

2 files changed

+86
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50002,6 +50002,28 @@ static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) {
5000250002
(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()));
5000350003
}
5000450004

50005+
/// Folds (and X, (or Y, ~Z)) --> (and X, ~(and ~Y, Z))
50006+
/// This undoes the inverse fold performed in InstCombine
50007+
static SDValue combineAndNotOrIntoAndNotAnd(SDNode *N, SelectionDAG &DAG) {
50008+
50009+
using namespace llvm::SDPatternMatch;
50010+
MVT VT = N->getSimpleValueType(0);
50011+
SDLoc DL(N);
50012+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
50013+
if (!TLI.hasAndNot(SDValue(N, 0)))
50014+
return SDValue();
50015+
50016+
SDValue X, Y, Z;
50017+
if (sd_match(
50018+
N, m_And(m_Value(X), m_OneUse(m_Or(m_Value(Y), m_Not(m_Value(Z)))))))
50019+
return DAG.getNode(
50020+
ISD::AND, DL, VT, X,
50021+
DAG.getNOT(DL, DAG.getNode(ISD::AND, DL, VT, DAG.getNOT(DL, Y, VT), Z),
50022+
VT));
50023+
50024+
return SDValue();
50025+
}
50026+
5000550027
// This function recognizes cases where X86 bzhi instruction can replace and
5000650028
// 'and-load' sequence.
5000750029
// In case of loading integer value from an array of constants which is defined
@@ -50493,6 +50515,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
5049350515
if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
5049450516
return R;
5049550517

50518+
if (SDValue R = combineAndNotOrIntoAndNotAnd(N, DAG))
50519+
return R;
50520+
5049650521
// fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
5049750522
// iff c2 is all/no bits mask - i.e. a select-with-zero mask.
5049850523
// TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?

llvm/test/CodeGen/X86/pr108731.ll

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,NOBMI
3+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,BMI
4+
5+
define i64 @foo(i64 %w, i64 %x, i64 %y, i64 %z) {
6+
; NOBMI-LABEL: foo:
7+
; NOBMI: # %bb.0: # %Entry
8+
; NOBMI-NEXT: movq %rcx, %rax
9+
; NOBMI-NEXT: andq %rdx, %rsi
10+
; NOBMI-NEXT: notq %rsi
11+
; NOBMI-NEXT: andq %rdi, %rsi
12+
; NOBMI-NEXT: notq %rax
13+
; NOBMI-NEXT: orq %rdx, %rax
14+
; NOBMI-NEXT: andq %rsi, %rax
15+
; NOBMI-NEXT: retq
16+
;
17+
; BMI-LABEL: foo:
18+
; BMI: # %bb.0: # %Entry
19+
; BMI-NEXT: andq %rdx, %rsi
20+
; BMI-NEXT: andnq %rdi, %rsi, %rax
21+
; BMI-NEXT: andnq %rcx, %rdx, %rcx
22+
; BMI-NEXT: andnq %rax, %rcx, %rax
23+
; BMI-NEXT: retq
24+
Entry:
25+
%and1 = and i64 %y, %x
26+
%xor1 = xor i64 %and1, -1
27+
%and2 = and i64 %xor1, %w
28+
%.not = xor i64 %z, -1
29+
%or1 = or i64 %.not, %y
30+
%and3 = and i64 %and2, %or1
31+
ret i64 %and3
32+
}
33+
34+
define <16 x i8> @fooVec(<16 x i8> %w, <16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
35+
; NOBMI-LABEL: fooVec:
36+
; NOBMI: # %bb.0: # %Entry
37+
; NOBMI-NEXT: andps %xmm2, %xmm1
38+
; NOBMI-NEXT: andnps %xmm0, %xmm1
39+
; NOBMI-NEXT: andnps %xmm3, %xmm2
40+
; NOBMI-NEXT: andnps %xmm1, %xmm2
41+
; NOBMI-NEXT: movaps %xmm2, %xmm0
42+
; NOBMI-NEXT: retq
43+
;
44+
; BMI-LABEL: fooVec:
45+
; BMI: # %bb.0: # %Entry
46+
; BMI-NEXT: vandps %xmm1, %xmm2, %xmm1
47+
; BMI-NEXT: vandnps %xmm0, %xmm1, %xmm0
48+
; BMI-NEXT: vandnps %xmm3, %xmm2, %xmm1
49+
; BMI-NEXT: vandnps %xmm0, %xmm1, %xmm0
50+
; BMI-NEXT: retq
51+
Entry:
52+
%and1 = and <16 x i8> %y, %x
53+
%xor1 = xor <16 x i8> %and1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
54+
%and2 = and <16 x i8> %xor1, %w
55+
%.not = xor <16 x i8> %z, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
56+
%or1 = or <16 x i8> %.not, %y
57+
%and3 = and <16 x i8> %and2, %or1
58+
ret <16 x i8> %and3
59+
}
60+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
61+
; CHECK: {{.*}}

0 commit comments

Comments
 (0)