Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3914,8 +3914,13 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
else
Base = Addr.getOperand(0);
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
mvt);

// Offset is a signed i32 constant in PTX [register+offset] address mode
if (!CN->getAPIntValue().isSignedIntN(32))
return false;

Offset = CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(OpNode),
MVT::i32);
return true;
}
}
Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5167,9 +5167,12 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
// - [areg+immoff]
// - [immAddr]

if (AM.BaseGV) {
// immoff is signed 32-bit
if (!APInt(64, AM.BaseOffs).isSignedIntN(32))
return false;

if (AM.BaseGV)
return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
}

switch (AM.Scale) {
case 0: // "r", "r+i" or "i" is allowed
Expand Down
85 changes: 85 additions & 0 deletions llvm/test/CodeGen/NVPTX/addr-mode.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 | FileCheck %s

target triple = "nvptx64-nvidia-cuda"

define i32 @test_addr_mode_i64(ptr %x) {
; CHECK-LABEL: test_addr_mode_i64(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i64_param_0];
; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
; CHECK-NEXT: ret;
%addr = getelementptr i32, ptr %x, i64 -1
%res = load i32, ptr %addr
ret i32 %res
}

define i32 @test_addr_mode_i32(ptr %x) {
; CHECK-LABEL: test_addr_mode_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i32_param_0];
; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
; CHECK-NEXT: ret;
%addr = getelementptr i32, ptr %x, i32 -1
%res = load i32, ptr %addr
ret i32 %res
}

define i32 @test_addr_mode_i16(ptr %x) {
; CHECK-LABEL: test_addr_mode_i16(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i16_param_0];
; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
; CHECK-NEXT: ret;
%addr = getelementptr i32, ptr %x, i16 -1
%res = load i32, ptr %addr
ret i32 %res
}

define i32 @test_addr_mode_i8(ptr %x) {
; CHECK-LABEL: test_addr_mode_i8(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i8_param_0];
; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
; CHECK-NEXT: ret;
%addr = getelementptr i32, ptr %x, i8 -1
%res = load i32, ptr %addr
ret i32 %res
}

define i32 @test_addr_mode_i64_large(ptr %x) {
; CHECK-LABEL: test_addr_mode_i64_large(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i64_large_param_0];
; CHECK-NEXT: add.s64 %rd2, %rd1, 17179869172;
; CHECK-NEXT: ld.u32 %r1, [%rd2];
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
; CHECK-NEXT: ret;
%addr = getelementptr i32, ptr %x, i64 4294967293
%res = load i32, ptr %addr
ret i32 %res
}
24 changes: 24 additions & 0 deletions llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -397,3 +397,27 @@ entry:
%ptr2 = getelementptr inbounds %struct0, ptr %ptr, i65 1, i32 3, i64 %idx, i32 1
ret ptr %ptr2
}

; Do not extract large constant offset that cannot be folded in to PTX
; addressing mode
define void @large_offset(ptr %out, i32 %in) {
; CHECK-LABEL: define void @large_offset(
; CHECK-SAME: ptr [[OUT:%.*]], i32 [[IN:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[TMP0]], 536870912
; CHECK-NEXT: [[IDX:%.*]] = zext nneg i32 [[ADD]] to i64
; CHECK-NEXT: [[GETELEM:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[IDX]]
; CHECK-NEXT: store i32 [[IN]], ptr [[GETELEM]], align 4
; CHECK-NEXT: ret void
;
entry:
%0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%add = add nuw nsw i32 %0, 536870912
%idx = zext nneg i32 %add to i64
%getElem = getelementptr inbounds i32, ptr %out, i64 %idx
store i32 %in, ptr %getElem, align 4
ret void
}

declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()