Skip to content

Commit c956250

Browse files
committed
[AArch64] prevent (shl (srl x, c1), c2) -> (and (shift x, c3)) when load
Currently, process of replacing bitwise operations consisting of `(shl (srl x, c1), c2)` with `And` is performed by `DAGCombiner`. However, in certain case like `(shl (srl, x, c1) 2)` is do not need to transform to `AND` if it was used to `Load` Target. Consider following case: ``` lsr x8, x8, #56 and x8, x8, #0xfc ldr w0, [x2, x8] ret ``` In this case, we can remove the `AND` by changing the target of `LDR` to `[X2, X8, LSL #2]` and right-shifting amount change to 56 to 58. after changed: ``` lsr x8, x8, #58 ldr w0, [x2, x8, lsl #2] ret ``` This patch checks to see if the `(shl (srl x, c1) 2)` operation on `load` target can be prevent transform to `And`.
1 parent 33d7398 commit c956250

File tree

2 files changed

+35
-25
lines changed

2 files changed

+35
-25
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+17
Original file line numberDiff line numberDiff line change
@@ -18012,6 +18012,23 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
1801218012
return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
1801318013
}
1801418014

18015+
// We do not need to fold when this shifting used in specific load case:
18016+
// (ldr x, (add x, (shl (srl x, c1) 2)))
18017+
if (N->getOpcode() == ISD::SHL && N->hasOneUse()) {
18018+
if (auto C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
18019+
unsigned ShlAmt = C2->getZExtValue();
18020+
if (auto ShouldADD = *N->use_begin();
18021+
ShouldADD->getOpcode() == ISD::ADD && ShouldADD->hasOneUse()) {
18022+
if (auto ShouldLOAD = dyn_cast<LoadSDNode>(*ShouldADD->use_begin())) {
18023+
unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8;
18024+
if ((1ULL << ShlAmt) == ByteVT &&
18025+
isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT()))
18026+
return false;
18027+
}
18028+
}
18029+
}
18030+
}
18031+
1801518032
return true;
1801618033
}
1801718034

llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll

+18-25
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@ define i16 @load16_shr63(i64 %a, i64 %b, ptr %table) {
66
; CHECK-LABEL: load16_shr63:
77
; CHECK: // %bb.0: // %entry
88
; CHECK-NEXT: mul x8, x1, x0
9-
; CHECK-NEXT: lsr x8, x8, #62
10-
; CHECK-NEXT: and x8, x8, #0x2
11-
; CHECK-NEXT: ldrh w0, [x2, x8]
9+
; CHECK-NEXT: lsr x8, x8, #63
10+
; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1]
1211
; CHECK-NEXT: ret
1312
entry:
1413
%mul = mul i64 %b, %a
@@ -22,9 +21,8 @@ define i16 @load16_shr2(i64 %a, i64 %b, ptr %table) {
2221
; CHECK-LABEL: load16_shr2:
2322
; CHECK: // %bb.0: // %entry
2423
; CHECK-NEXT: mul x8, x1, x0
25-
; CHECK-NEXT: lsr x8, x8, #1
26-
; CHECK-NEXT: and x8, x8, #0x7ffffffffffffffe
27-
; CHECK-NEXT: ldrh w0, [x2, x8]
24+
; CHECK-NEXT: lsr x8, x8, #2
25+
; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1]
2826
; CHECK-NEXT: ret
2927
entry:
3028
%mul = mul i64 %b, %a
@@ -38,8 +36,8 @@ define i16 @load16_shr1(i64 %a, i64 %b, ptr %table) {
3836
; CHECK-LABEL: load16_shr1:
3937
; CHECK: // %bb.0: // %entry
4038
; CHECK-NEXT: mul x8, x1, x0
41-
; CHECK-NEXT: and x8, x8, #0xfffffffffffffffe
42-
; CHECK-NEXT: ldrh w0, [x2, x8]
39+
; CHECK-NEXT: lsr x8, x8, #1
40+
; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1]
4341
; CHECK-NEXT: ret
4442
entry:
4543
%mul = mul i64 %b, %a
@@ -53,9 +51,8 @@ define i32 @load32_shr63(i64 %a, i64 %b, ptr %table) {
5351
; CHECK-LABEL: load32_shr63:
5452
; CHECK: // %bb.0: // %entry
5553
; CHECK-NEXT: mul x8, x1, x0
56-
; CHECK-NEXT: lsr x8, x8, #61
57-
; CHECK-NEXT: and x8, x8, #0x4
58-
; CHECK-NEXT: ldr w0, [x2, x8]
54+
; CHECK-NEXT: lsr x8, x8, #63
55+
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
5956
; CHECK-NEXT: ret
6057
entry:
6158
%mul = mul i64 %b, %a
@@ -69,8 +66,8 @@ define i32 @load32_shr2(i64 %a, i64 %b, ptr %table) {
6966
; CHECK-LABEL: load32_shr2:
7067
; CHECK: // %bb.0: // %entry
7168
; CHECK-NEXT: mul x8, x1, x0
72-
; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc
73-
; CHECK-NEXT: ldr w0, [x2, x8]
69+
; CHECK-NEXT: lsr x8, x8, #2
70+
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
7471
; CHECK-NEXT: ret
7572
entry:
7673
%mul = mul i64 %b, %a
@@ -84,9 +81,8 @@ define i32 @load32_shr1(i64 %a, i64 %b, ptr %table) {
8481
; CHECK-LABEL: load32_shr1:
8582
; CHECK: // %bb.0: // %entry
8683
; CHECK-NEXT: mul x8, x1, x0
87-
; CHECK-NEXT: lsl x8, x8, #1
88-
; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc
89-
; CHECK-NEXT: ldr w0, [x2, x8]
84+
; CHECK-NEXT: lsr x8, x8, #1
85+
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
9086
; CHECK-NEXT: ret
9187
entry:
9288
%mul = mul i64 %b, %a
@@ -100,9 +96,8 @@ define i64 @load64_shr63(i64 %a, i64 %b, ptr %table) {
10096
; CHECK-LABEL: load64_shr63:
10197
; CHECK: // %bb.0: // %entry
10298
; CHECK-NEXT: mul x8, x1, x0
103-
; CHECK-NEXT: lsr x8, x8, #60
104-
; CHECK-NEXT: and x8, x8, #0x8
105-
; CHECK-NEXT: ldr x0, [x2, x8]
99+
; CHECK-NEXT: lsr x8, x8, #63
100+
; CHECK-NEXT: ldr x0, [x2, x8, lsl #3]
106101
; CHECK-NEXT: ret
107102
entry:
108103
%mul = mul i64 %b, %a
@@ -116,9 +111,8 @@ define i64 @load64_shr2(i64 %a, i64 %b, ptr %table) {
116111
; CHECK-LABEL: load64_shr2:
117112
; CHECK: // %bb.0: // %entry
118113
; CHECK-NEXT: mul x8, x1, x0
119-
; CHECK-NEXT: lsl x8, x8, #1
120-
; CHECK-NEXT: and x8, x8, #0xfffffffffffffff8
121-
; CHECK-NEXT: ldr x0, [x2, x8]
114+
; CHECK-NEXT: lsr x8, x8, #2
115+
; CHECK-NEXT: ldr x0, [x2, x8, lsl #3]
122116
; CHECK-NEXT: ret
123117
entry:
124118
%mul = mul i64 %b, %a
@@ -132,9 +126,8 @@ define i64 @load64_shr1(i64 %a, i64 %b, ptr %table) {
132126
; CHECK-LABEL: load64_shr1:
133127
; CHECK: // %bb.0: // %entry
134128
; CHECK-NEXT: mul x8, x1, x0
135-
; CHECK-NEXT: lsl x8, x8, #2
136-
; CHECK-NEXT: and x8, x8, #0xfffffffffffffff8
137-
; CHECK-NEXT: ldr x0, [x2, x8]
129+
; CHECK-NEXT: lsr x8, x8, #1
130+
; CHECK-NEXT: ldr x0, [x2, x8, lsl #3]
138131
; CHECK-NEXT: ret
139132
entry:
140133
%mul = mul i64 %b, %a

0 commit comments

Comments
 (0)