Skip to content

Reland "DAG: Preserve range metadata when load is narrowed" (#128144) #130609

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14957,12 +14957,37 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
AddToWorklist(NewPtr.getNode());

SDValue Load;
if (ExtType == ISD::NON_EXTLOAD)
Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->getOriginalAlign(),
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
else
if (ExtType == ISD::NON_EXTLOAD) {
const MDNode *OldRanges = LN0->getRanges();
const MDNode *NewRanges = nullptr;
// If LSBs are loaded and the truncated ConstantRange for the OldRanges
// metadata is not the full-set for the new width then create a NewRanges
// metadata for the truncated load
if (ShAmt == 0 && OldRanges) {
ConstantRange CR = getConstantRangeFromMetadata(*OldRanges);
unsigned BitSize = VT.getScalarSizeInBits();

// It is possible for an 8-bit extending load with 8-bit range
// metadata to be narrowed to an 8-bit load. This guard is necessary to
// ensure that truncation is strictly smaller.
if (CR.getBitWidth() > BitSize) {
ConstantRange TruncatedCR = CR.truncate(BitSize);
if (!TruncatedCR.isFullSet()) {
Metadata *Bounds[2] = {
ConstantAsMetadata::get(
ConstantInt::get(*DAG.getContext(), TruncatedCR.getLower())),
ConstantAsMetadata::get(
ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
NewRanges = MDNode::get(*DAG.getContext(), Bounds);
}
} else if (CR.getBitWidth() == BitSize)
NewRanges = OldRanges;
}
Load = DAG.getLoad(
VT, DL, LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff), LN0->getOriginalAlign(),
LN0->getMemOperand()->getFlags(), LN0->getAAInfo(), NewRanges);
} else
Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
LN0->getOriginalAlign(),
Expand Down
64 changes: 56 additions & 8 deletions llvm/test/CodeGen/AMDGPU/shl64_reduce.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,66 @@
; Test range with metadata
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; FIXME: This case should be reduced, but SelectionDAG::computeKnownBits() cannot
; determine the minimum from metadata in this case. Match current results
; for now.

define i64 @shl_metadata(i64 %arg0, ptr %arg1.ptr) {
; CHECK-LABEL: shl_metadata:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dword v1, v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_lshlrev_b32_e32 v1, v1, v0
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%shift.amt = load i64, ptr %arg1.ptr, !range !0, !noundef !{}
%shl = shl i64 %arg0, %shift.amt
ret i64 %shl
}

define i64 @shl_metadata_two_ranges(i64 %arg0, ptr %arg1.ptr) {
; CHECK-LABEL: shl_metadata_two_ranges:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dword v1, v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_lshlrev_b32_e32 v1, v1, v0
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%shift.amt = load i64, ptr %arg1.ptr, !range !1, !noundef !{}
%shl = shl i64 %arg0, %shift.amt
ret i64 %shl
}

; Known minimum is too low. Reduction must not be done.
define i64 @shl_metadata_out_of_range(i64 %arg0, ptr %arg1.ptr) {
; CHECK-LABEL: shl_metadata_out_of_range:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dword v2, v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
; CHECK-NEXT: s_setpc_b64 s[30:31]
%shift.amt = load i64, ptr %arg1.ptr, !range !2, !noundef !{}
%shl = shl i64 %arg0, %shift.amt
ret i64 %shl
}

; Bounds cannot be truncated to i32 when load is narrowed to i32.
; Reduction must not be done.
; Bounds were chosen so that if bounds were truncated to i32 the
; known minimum would be 32 and the shl would be erroneously reduced.
define i64 @shl_metadata_cant_be_narrowed_to_i32(i64 %arg0, ptr %arg1.ptr) {
; CHECK-LABEL: shl_metadata_cant_be_narrowed_to_i32:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dword v2, v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
; CHECK-NEXT: s_setpc_b64 s[30:31]
%shift.amt = load i64, ptr %arg1.ptr, !range !0
%shift.amt = load i64, ptr %arg1.ptr, !range !3, !noundef !{}
%shl = shl i64 %arg0, %shift.amt
ret i64 %shl
}

; FIXME: This case should be reduced
define <2 x i64> @shl_v2_metadata(<2 x i64> %arg0, ptr %arg1.ptr) {
; CHECK-LABEL: shl_v2_metadata:
; CHECK: ; %bb.0:
Expand All @@ -39,11 +82,12 @@ define <2 x i64> @shl_v2_metadata(<2 x i64> %arg0, ptr %arg1.ptr) {
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
; CHECK-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3]
; CHECK-NEXT: s_setpc_b64 s[30:31]
%shift.amt = load <2 x i64>, ptr %arg1.ptr, !range !0
%shift.amt = load <2 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
%shl = shl <2 x i64> %arg0, %shift.amt
ret <2 x i64> %shl
}

; FIXME: This case should be reduced
define <3 x i64> @shl_v3_metadata(<3 x i64> %arg0, ptr %arg1.ptr) {
; CHECK-LABEL: shl_v3_metadata:
; CHECK: ; %bb.0:
Expand All @@ -55,11 +99,12 @@ define <3 x i64> @shl_v3_metadata(<3 x i64> %arg0, ptr %arg1.ptr) {
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1]
; CHECK-NEXT: v_lshlrev_b64 v[2:3], v10, v[2:3]
; CHECK-NEXT: s_setpc_b64 s[30:31]
%shift.amt = load <3 x i64>, ptr %arg1.ptr, !range !0
%shift.amt = load <3 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
%shl = shl <3 x i64> %arg0, %shift.amt
ret <3 x i64> %shl
}

; FIXME: This case should be reduced
define <4 x i64> @shl_v4_metadata(<4 x i64> %arg0, ptr %arg1.ptr) {
; CHECK-LABEL: shl_v4_metadata:
; CHECK: ; %bb.0:
Expand All @@ -74,12 +119,15 @@ define <4 x i64> @shl_v4_metadata(<4 x i64> %arg0, ptr %arg1.ptr) {
; CHECK-NEXT: v_lshlrev_b64 v[4:5], v13, v[4:5]
; CHECK-NEXT: v_lshlrev_b64 v[6:7], v15, v[6:7]
; CHECK-NEXT: s_setpc_b64 s[30:31]
%shift.amt = load <4 x i64>, ptr %arg1.ptr, !range !0
%shift.amt = load <4 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
%shl = shl <4 x i64> %arg0, %shift.amt
ret <4 x i64> %shl
}

!0 = !{i64 32, i64 64}
!1 = !{i64 32, i64 38, i64 42, i64 48}
!2 = !{i64 31, i64 38, i64 42, i64 48}
!3 = !{i64 32, i64 38, i64 2147483680, i64 2147483681}

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Test range with an "or X, 16"
Expand Down
25 changes: 25 additions & 0 deletions llvm/test/CodeGen/X86/narrow-load-metadata.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
;
; This test case is reduced from RangeConstraintManager.cpp in a ASan build.
; It crashes reduceLoadWidth in DAGCombiner.cpp. Preservation of range
; metadata must ensure that ConstantRange truncation is strictly smaller.

define i8 @narrow_load_metadata(ptr %valptr) {
; CHECK-LABEL: narrow_load_metadata:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movzbl (%rdi), %eax
; CHECK-NEXT: movb %al, 4(%rdi)
; CHECK-NEXT: movl $0, (%rdi)
; CHECK-NEXT: retq
entry:
%val = load i8, ptr %valptr, align 4, !range !0, !noundef !1
%retval.sroa.1.0.insert.ext.i = zext i8 %val to i64
%retval.sroa.1.0.insert.shift.i = shl i64 %retval.sroa.1.0.insert.ext.i, 32
%coerce.val.ii = trunc i64 %retval.sroa.1.0.insert.shift.i to i40
store i40 %coerce.val.ii, ptr %valptr, align 4
ret i8 %val
}

!0 = !{i8 0, i8 2}
!1 = !{}