-
Notifications
You must be signed in to change notification settings - Fork 13.6k
DAG: Preserve range metadata when load is narrowed #128144
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-amdgpu Author: None (LU-JOHN) ChangesIn DAGCombiner.cpp preserve range metadata when load is narrowed to load LSBs if original range metadata bounds can fit in the narrower type. Utilize preserved range metadata to reduce 64-bit shl to 32-bit shl. Full diff: https://github.com/llvm/llvm-project/pull/128144.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bc7cdf38dbc2a..ba59bf269ba86 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14903,12 +14903,39 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
AddToWorklist(NewPtr.getNode());
SDValue Load;
- if (ExtType == ISD::NON_EXTLOAD)
- Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
- LN0->getPointerInfo().getWithOffset(PtrOff),
- LN0->getOriginalAlign(),
- LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
- else
+ if (ExtType == ISD::NON_EXTLOAD) {
+ const MDNode *OldRanges = LN0->getRanges();
+ const MDNode *NewRanges = nullptr;
+ /* If LSBs are loaded and all bounds in the OldRanges metadata fit in
+ the narrower size, preserve the range information by translating
+ to the the new narrower type, NewTy */
+ if (ShAmt == 0 && OldRanges) {
+ Type *NewTy = VT.getTypeForEVT(*DAG.getContext());
+ const unsigned NumOperands = OldRanges->getNumOperands();
+ const unsigned NewWidth = NewTy->getIntegerBitWidth();
+ bool InRange = true;
+ SmallVector<Metadata *, 4> Bounds;
+ Bounds.reserve(NumOperands);
+
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ const APInt &BoundValue =
+ mdconst::extract<ConstantInt>(OldRanges->getOperand(i))->getValue();
+ if (BoundValue.getBitWidth() - BoundValue.getNumSignBits() >=
+ NewWidth) {
+ InRange = false;
+ break;
+ }
+ Bounds.push_back(ConstantAsMetadata::get(
+ ConstantInt::get(NewTy, BoundValue.trunc(NewWidth))));
+ }
+ if (InRange)
+ NewRanges = MDNode::get(*DAG.getContext(), Bounds);
+ }
+ Load = DAG.getLoad(
+ VT, DL, LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff), LN0->getOriginalAlign(),
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo(), NewRanges);
+ } else
Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
LN0->getOriginalAlign(),
diff --git a/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll b/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll
index 05430213c17d2..55bfc079cb1c4 100644
--- a/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll
@@ -21,11 +21,39 @@ define i64 @shl_metadata(i64 %arg0, ptr %arg1.ptr) {
; CHECK-LABEL: shl_metadata:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: flat_load_dword v1, v[2:3]
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_lshlrev_b32_e32 v1, v1, v0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %shift.amt = load i64, ptr %arg1.ptr, !range !0, !noundef !{}
+ %shl = shl i64 %arg0, %shift.amt
+ ret i64 %shl
+}
+
+define i64 @shl_metadata_two_ranges(i64 %arg0, ptr %arg1.ptr) {
+; CHECK-LABEL: shl_metadata_two_ranges:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: flat_load_dword v1, v[2:3]
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_lshlrev_b32_e32 v1, v1, v0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %shift.amt = load i64, ptr %arg1.ptr, !range !1, !noundef !{}
+ %shl = shl i64 %arg0, %shift.amt
+ ret i64 %shl
+}
+
+define i64 @shl_metadata_out_of_range(i64 %arg0, ptr %arg1.ptr) {
+; CHECK-LABEL: shl_metadata_out_of_range:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dword v2, v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
; CHECK-NEXT: s_setpc_b64 s[30:31]
- %shift.amt = load i64, ptr %arg1.ptr, !range !0
+ %shift.amt = load i64, ptr %arg1.ptr, !range !2, !noundef !{}
%shl = shl i64 %arg0, %shift.amt
ret i64 %shl
}
@@ -39,7 +67,7 @@ define <2 x i64> @shl_v2_metadata(<2 x i64> %arg0, ptr %arg1.ptr) {
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
; CHECK-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3]
; CHECK-NEXT: s_setpc_b64 s[30:31]
- %shift.amt = load <2 x i64>, ptr %arg1.ptr, !range !0
+ %shift.amt = load <2 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
%shl = shl <2 x i64> %arg0, %shift.amt
ret <2 x i64> %shl
}
@@ -55,7 +83,7 @@ define <3 x i64> @shl_v3_metadata(<3 x i64> %arg0, ptr %arg1.ptr) {
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1]
; CHECK-NEXT: v_lshlrev_b64 v[2:3], v10, v[2:3]
; CHECK-NEXT: s_setpc_b64 s[30:31]
- %shift.amt = load <3 x i64>, ptr %arg1.ptr, !range !0
+ %shift.amt = load <3 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
%shl = shl <3 x i64> %arg0, %shift.amt
ret <3 x i64> %shl
}
@@ -74,12 +102,14 @@ define <4 x i64> @shl_v4_metadata(<4 x i64> %arg0, ptr %arg1.ptr) {
; CHECK-NEXT: v_lshlrev_b64 v[4:5], v13, v[4:5]
; CHECK-NEXT: v_lshlrev_b64 v[6:7], v15, v[6:7]
; CHECK-NEXT: s_setpc_b64 s[30:31]
- %shift.amt = load <4 x i64>, ptr %arg1.ptr, !range !0
+ %shift.amt = load <4 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
%shl = shl <4 x i64> %arg0, %shift.amt
ret <4 x i64> %shl
}
!0 = !{i64 32, i64 64}
+!1 = !{i64 32, i64 38, i64 42, i64 48}
+!2 = !{i64 31, i64 38, i64 42, i64 48}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Test range with an "or X, 16"
|
Signed-off-by: John Lu <[email protected]>
Signed-off-by: John Lu <[email protected]>
if (ExtType == ISD::NON_EXTLOAD) { | ||
const MDNode *OldRanges = LN0->getRanges(); | ||
const MDNode *NewRanges = nullptr; | ||
/* If LSBs are loaded and all bounds in the OldRanges metadata fit in |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The core logic ideally would go in a utility function on ConstantRanges, not directly inline here. We will eventually want to share the same logic with globalisel
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Used ConstantRange::getMinSignedBits() to implement core logic.
const MDNode *OldRanges = LN0->getRanges(); | ||
const MDNode *NewRanges = nullptr; | ||
/* If LSBs are loaded and all bounds in the OldRanges metadata fit in | ||
the narrower size, preserve the range information by translating |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need new negative tests if it's not in range?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added negative test where bounds cannot fit in 32-bits.
820ac1e
to
c9b533d
Compare
Signed-off-by: John Lu <[email protected]>
Signed-off-by: John Lu <[email protected]>
Signed-off-by: John Lu <[email protected]>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we just get the ConstantRange and trunc() it? I really don't think trying to preserve the sub-ranges is worthwhile.
Signed-off-by: John Lu <[email protected]>
Make one new range to cover all previous sub-ranges. |
Signed-off-by: John Lu <[email protected]>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks reasonable to me.
't use getTypeForEVT, remove const Signed-off-by: John Lu <[email protected]>
The ASan builds have been failing, and I think it may be related to this DAGCombiner change (first seen in https://lab.llvm.org/buildbot/#/builders/52/builds/6343). Could you please take a look?
The ASan builds were red prior to this change but due to other patches (with different failure reports). |
We are also seeing the same assertion failure in some of our internal testing which I bisected back to this commit. I am currently working on trying to get a smaller repro. |
This has been causing a build failure on the Fuchsia toolchain clang build. I've verified that reverting this PR allows clang to once again build successfully, so I'm issuing a revert.
|
This reverts commit d8bcb53.
Reverts #128144 Breaks clang prod x64 build (seen in Fuchsia toolchain)
…" (#128948) Reverts llvm/llvm-project#128144 Breaks clang prod x64 build (seen in Fuchsia toolchain)
…#130609) Changes: Add guard to ensure truncation is strictly smaller than original size. --------- Signed-off-by: John Lu <[email protected]>
) (llvm#130609) Changes: Add guard to ensure truncation is strictly smaller than original size. --------- Signed-off-by: John Lu <[email protected]>
In DAGCombiner.cpp preserve range metadata when load is narrowed to load LSBs if original range metadata bounds can fit in the narrower type.
Utilize preserved range metadata to reduce 64-bit shl to 32-bit shl.