diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 971a128aadfdb..7e628567e71c8 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -930,8 +930,6 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) { return tryLDGLDU(N); } - unsigned int PointerSize = - CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace()); SDLoc DL(N); SDValue Chain = N->getOperand(0); @@ -964,37 +962,24 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { FromType = getLdStRegType(ScalarVT); // Create the machine instruction DAG - SDValue N1 = N->getOperand(1); SDValue Offset, Base; - std::optional Opcode; - MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; - - SmallVector Ops({getI32Imm(Ordering, DL), getI32Imm(Scope, DL), - getI32Imm(CodeAddrSpace, DL), - getI32Imm(VecType, DL), getI32Imm(FromType, DL), - getI32Imm(FromTypeWidth, DL)}); - - if (SelectADDRsi(N1.getNode(), N1, Base, Offset)) { - Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi, - NVPTX::LD_i32_asi, NVPTX::LD_i64_asi, - NVPTX::LD_f32_asi, NVPTX::LD_f64_asi); - } else { - if (PointerSize == 64) { - SelectADDRri64(N1.getNode(), N1, Base, Offset); - Opcode = - pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64, - NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, - NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64); - } else { - SelectADDRri(N1.getNode(), N1, Base, Offset); - Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, - NVPTX::LD_i32_ari, NVPTX::LD_i64_ari, - NVPTX::LD_f32_ari, NVPTX::LD_f64_ari); - } - } + SelectADDR(N->getOperand(1), Base, Offset); + SDValue Ops[] = {getI32Imm(Ordering, DL), + getI32Imm(Scope, DL), + getI32Imm(CodeAddrSpace, DL), + getI32Imm(VecType, DL), + getI32Imm(FromType, DL), + getI32Imm(FromTypeWidth, DL), + Base, + Offset, + Chain}; + + const MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; + const std::optional Opcode = + pickOpcodeForVT(TargetVT, NVPTX::LD_i8, NVPTX::LD_i16, NVPTX::LD_i32, + NVPTX::LD_i64, NVPTX::LD_f32, NVPTX::LD_f64); if (!Opcode) return false; - Ops.append({Base, Offset, Chain}); SDNode *NVPTXLD = CurDAG->getMachineNode(*Opcode, DL, TargetVT, MVT::Other, Ops); @@ -1030,8 +1015,6 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) { return tryLDGLDU(N); } - unsigned int PointerSize = - CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); SDLoc DL(N); SDValue Chain = N->getOperand(0); @@ -1079,77 +1062,38 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { FromTypeWidth = 32; } - SDValue Op1 = N->getOperand(1); SDValue Offset, Base; - std::optional Opcode; - SDNode *LD; + SelectADDR(N->getOperand(1), Base, Offset); + SDValue Ops[] = {getI32Imm(Ordering, DL), + getI32Imm(Scope, DL), + getI32Imm(CodeAddrSpace, DL), + getI32Imm(VecType, DL), + getI32Imm(FromType, DL), + getI32Imm(FromTypeWidth, DL), + Base, + Offset, + Chain}; - SmallVector Ops({getI32Imm(Ordering, DL), getI32Imm(Scope, DL), - getI32Imm(CodeAddrSpace, DL), - getI32Imm(VecType, DL), getI32Imm(FromType, DL), - getI32Imm(FromTypeWidth, DL)}); - - if (SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi, - NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi, - NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi); - break; - case NVPTXISD::LoadV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi, - NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, - std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt); - break; - } - } else { - if (PointerSize == 64) { - SelectADDRri64(Op1.getNode(), Op1, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::LoadV2: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64, - NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64, - NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64); - break; - case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64, - NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt, - NVPTX::LDV_f32_v4_ari_64, std::nullopt); - break; - } - } else { - SelectADDRri(Op1.getNode(), Op1, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari, - NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari, - NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari); - break; - case NVPTXISD::LoadV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari, - NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, - std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt); - break; - } - } + std::optional Opcode; + switch (N->getOpcode()) { + default: + return false; + case NVPTXISD::LoadV2: + Opcode = + pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2, + NVPTX::LDV_i16_v2, NVPTX::LDV_i32_v2, NVPTX::LDV_i64_v2, + NVPTX::LDV_f32_v2, NVPTX::LDV_f64_v2); + break; + case NVPTXISD::LoadV4: + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4, + NVPTX::LDV_i16_v4, NVPTX::LDV_i32_v4, std::nullopt, + NVPTX::LDV_f32_v4, std::nullopt); + break; } if (!Opcode) return false; - Ops.append({Base, Offset, Chain}); - LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops); + + SDNode *LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops); MachineMemOperand *MemRef = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(LD), {MemRef}); @@ -1196,177 +1140,60 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { SDVTList InstVTList = CurDAG->getVTList(InstVTs); SDValue Chain = N->getOperand(0); - std::optional Opcode; - SDLoc DL(N); - SDNode *LD; SDValue Base, Offset; + SelectADDR(Op1, Base, Offset); + SDValue Ops[] = {Base, Offset, Chain}; - if (SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { - switch (N->getOpcode()) { - default: - return false; - case ISD::LOAD: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8asi, - NVPTX::INT_PTX_LDG_GLOBAL_i16asi, NVPTX::INT_PTX_LDG_GLOBAL_i32asi, - NVPTX::INT_PTX_LDG_GLOBAL_i64asi, NVPTX::INT_PTX_LDG_GLOBAL_f32asi, - NVPTX::INT_PTX_LDG_GLOBAL_f64asi); - break; - case ISD::INTRINSIC_W_CHAIN: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8asi, - NVPTX::INT_PTX_LDU_GLOBAL_i16asi, NVPTX::INT_PTX_LDU_GLOBAL_i32asi, - NVPTX::INT_PTX_LDU_GLOBAL_i64asi, NVPTX::INT_PTX_LDU_GLOBAL_f32asi, - NVPTX::INT_PTX_LDU_GLOBAL_f64asi); - break; - case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v2i8_ELE_asi, - NVPTX::INT_PTX_LDG_G_v2i16_ELE_asi, - NVPTX::INT_PTX_LDG_G_v2i32_ELE_asi, - NVPTX::INT_PTX_LDG_G_v2i64_ELE_asi, - NVPTX::INT_PTX_LDG_G_v2f32_ELE_asi, - NVPTX::INT_PTX_LDG_G_v2f64_ELE_asi); - break; - case NVPTXISD::LDUV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v2i8_ELE_asi, - NVPTX::INT_PTX_LDU_G_v2i16_ELE_asi, - NVPTX::INT_PTX_LDU_G_v2i32_ELE_asi, - NVPTX::INT_PTX_LDU_G_v2i64_ELE_asi, - NVPTX::INT_PTX_LDU_G_v2f32_ELE_asi, - NVPTX::INT_PTX_LDU_G_v2f64_ELE_asi); - break; - case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_asi, - NVPTX::INT_PTX_LDG_G_v4i16_ELE_asi, - NVPTX::INT_PTX_LDG_G_v4i32_ELE_asi, std::nullopt, - NVPTX::INT_PTX_LDG_G_v4f32_ELE_asi, std::nullopt); - break; - case NVPTXISD::LDUV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_asi, - NVPTX::INT_PTX_LDU_G_v4i16_ELE_asi, - NVPTX::INT_PTX_LDU_G_v4i32_ELE_asi, std::nullopt, - NVPTX::INT_PTX_LDU_G_v4f32_ELE_asi, std::nullopt); - break; - } - } else { - if (TM.is64Bit()) { - SelectADDRri64(Op1.getNode(), Op1, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case ISD::LOAD: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_GLOBAL_i8ari64, - NVPTX::INT_PTX_LDG_GLOBAL_i16ari64, - NVPTX::INT_PTX_LDG_GLOBAL_i32ari64, - NVPTX::INT_PTX_LDG_GLOBAL_i64ari64, - NVPTX::INT_PTX_LDG_GLOBAL_f32ari64, - NVPTX::INT_PTX_LDG_GLOBAL_f64ari64); - break; - case ISD::INTRINSIC_W_CHAIN: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_GLOBAL_i8ari64, - NVPTX::INT_PTX_LDU_GLOBAL_i16ari64, - NVPTX::INT_PTX_LDU_GLOBAL_i32ari64, - NVPTX::INT_PTX_LDU_GLOBAL_i64ari64, - NVPTX::INT_PTX_LDU_GLOBAL_f32ari64, - NVPTX::INT_PTX_LDU_GLOBAL_f64ari64); - break; - case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64); - break; - case NVPTXISD::LDUV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64); - break; - case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt, - NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt); - break; - case NVPTXISD::LDUV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt, - NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt); - break; - } - } else { - SelectADDRri(Op1.getNode(), Op1, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case ISD::LOAD: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8ari, - NVPTX::INT_PTX_LDG_GLOBAL_i16ari, NVPTX::INT_PTX_LDG_GLOBAL_i32ari, - NVPTX::INT_PTX_LDG_GLOBAL_i64ari, NVPTX::INT_PTX_LDG_GLOBAL_f32ari, - NVPTX::INT_PTX_LDG_GLOBAL_f64ari); - break; - case ISD::INTRINSIC_W_CHAIN: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8ari, - NVPTX::INT_PTX_LDU_GLOBAL_i16ari, NVPTX::INT_PTX_LDU_GLOBAL_i32ari, - NVPTX::INT_PTX_LDU_GLOBAL_i64ari, NVPTX::INT_PTX_LDU_GLOBAL_f32ari, - NVPTX::INT_PTX_LDU_GLOBAL_f64ari); - break; - case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32); - break; - case NVPTXISD::LDUV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32); - break; - case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt, - NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt); - break; - case NVPTXISD::LDUV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt, - NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt); - break; - } - } + std::optional Opcode; + switch (N->getOpcode()) { + default: + return false; + case ISD::LOAD: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8, + NVPTX::INT_PTX_LDG_GLOBAL_i16, NVPTX::INT_PTX_LDG_GLOBAL_i32, + NVPTX::INT_PTX_LDG_GLOBAL_i64, NVPTX::INT_PTX_LDG_GLOBAL_f32, + NVPTX::INT_PTX_LDG_GLOBAL_f64); + break; + case ISD::INTRINSIC_W_CHAIN: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8, + NVPTX::INT_PTX_LDU_GLOBAL_i16, NVPTX::INT_PTX_LDU_GLOBAL_i32, + NVPTX::INT_PTX_LDU_GLOBAL_i64, NVPTX::INT_PTX_LDU_GLOBAL_f32, + NVPTX::INT_PTX_LDU_GLOBAL_f64); + break; + case NVPTXISD::LoadV2: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE, + NVPTX::INT_PTX_LDG_G_v2i16_ELE, NVPTX::INT_PTX_LDG_G_v2i32_ELE, + NVPTX::INT_PTX_LDG_G_v2i64_ELE, NVPTX::INT_PTX_LDG_G_v2f32_ELE, + NVPTX::INT_PTX_LDG_G_v2f64_ELE); + break; + case NVPTXISD::LDUV2: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v2i8_ELE, + NVPTX::INT_PTX_LDU_G_v2i16_ELE, NVPTX::INT_PTX_LDU_G_v2i32_ELE, + NVPTX::INT_PTX_LDU_G_v2i64_ELE, NVPTX::INT_PTX_LDU_G_v2f32_ELE, + NVPTX::INT_PTX_LDU_G_v2f64_ELE); + break; + case NVPTXISD::LoadV4: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE, + NVPTX::INT_PTX_LDG_G_v4i16_ELE, NVPTX::INT_PTX_LDG_G_v4i32_ELE, + std::nullopt, NVPTX::INT_PTX_LDG_G_v4f32_ELE, std::nullopt); + break; + case NVPTXISD::LDUV4: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE, + NVPTX::INT_PTX_LDU_G_v4i16_ELE, NVPTX::INT_PTX_LDU_G_v4i32_ELE, + std::nullopt, NVPTX::INT_PTX_LDU_G_v4f32_ELE, std::nullopt); + break; } if (!Opcode) return false; - SDValue Ops[] = {Base, Offset, Chain}; - LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops); + + SDLoc DL(N); + SDNode *LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops); // For automatic generation of LDG (through SelectLoad[Vector], not the // intrinsics), we may have an extending load like: @@ -1424,8 +1251,6 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { // Address Space Setting unsigned int CodeAddrSpace = getCodeAddrSpace(ST); - unsigned int PointerSize = - CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace()); SDLoc DL(N); SDValue Chain = ST->getChain(); @@ -1450,38 +1275,28 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { // Create the machine instruction DAG SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal(); - SDValue BasePtr = ST->getBasePtr(); + SDValue Offset, Base; - std::optional Opcode; - MVT::SimpleValueType SourceVT = + SelectADDR(ST->getBasePtr(), Base, Offset); + + SDValue Ops[] = {Value, + getI32Imm(Ordering, DL), + getI32Imm(Scope, DL), + getI32Imm(CodeAddrSpace, DL), + getI32Imm(VecType, DL), + getI32Imm(ToType, DL), + getI32Imm(ToTypeWidth, DL), + Base, + Offset, + Chain}; + + const MVT::SimpleValueType SourceVT = Value.getNode()->getSimpleValueType(0).SimpleTy; - - SmallVector Ops( - {Value, getI32Imm(Ordering, DL), getI32Imm(Scope, DL), - getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL), - getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL)}); - - if (SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) { - Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi, - NVPTX::ST_i32_asi, NVPTX::ST_i64_asi, - NVPTX::ST_f32_asi, NVPTX::ST_f64_asi); - } else { - if (PointerSize == 64) { - SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset); - Opcode = - pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64, - NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, - NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64); - } else { - SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset); - Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari, - NVPTX::ST_i32_ari, NVPTX::ST_i64_ari, - NVPTX::ST_f32_ari, NVPTX::ST_f64_ari); - } - } + const std::optional Opcode = + pickOpcodeForVT(SourceVT, NVPTX::ST_i8, NVPTX::ST_i16, NVPTX::ST_i32, + NVPTX::ST_i64, NVPTX::ST_f32, NVPTX::ST_f64); if (!Opcode) return false; - Ops.append({Base, Offset, Chain}); SDNode *NVPTXST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops); @@ -1496,9 +1311,6 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { SDValue Op1 = N->getOperand(1); - SDValue Offset, Base; - std::optional Opcode; - SDNode *ST; EVT EltVT = Op1.getValueType(); MemSDNode *MemSD = cast(N); EVT StoreVT = MemSD->getMemoryVT(); @@ -1509,8 +1321,6 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { report_fatal_error("Cannot store to pointer that points to constant " "memory space"); } - unsigned int PointerSize = - CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); SDLoc DL(N); SDValue Chain = N->getOperand(0); @@ -1549,72 +1359,35 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { ToTypeWidth = 32; } + SDValue Offset, Base; + SelectADDR(N2, Base, Offset); + Ops.append({getI32Imm(Ordering, DL), getI32Imm(Scope, DL), getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL), - getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL)}); + getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL), Base, Offset, + Chain}); - if (SelectADDRsi(N2.getNode(), N2, Base, Offset)) { - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::StoreV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi, - NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi, - NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi); - break; - case NVPTXISD::StoreV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi, - NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, - std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt); - break; - } - } else { - if (PointerSize == 64) { - SelectADDRri64(N2.getNode(), N2, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::StoreV2: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64, - NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64, - NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64); - break; - case NVPTXISD::StoreV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64, - NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt, - NVPTX::STV_f32_v4_ari_64, std::nullopt); - break; - } - } else { - SelectADDRri(N2.getNode(), N2, Base, Offset); - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::StoreV2: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari, - NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari, - NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari); - break; - case NVPTXISD::StoreV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari, - NVPTX::STV_i32_v4_ari, std::nullopt, - NVPTX::STV_f32_v4_ari, std::nullopt); - break; - } - } + std::optional Opcode; + switch (N->getOpcode()) { + default: + return false; + case NVPTXISD::StoreV2: + Opcode = + pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2, + NVPTX::STV_i16_v2, NVPTX::STV_i32_v2, NVPTX::STV_i64_v2, + NVPTX::STV_f32_v2, NVPTX::STV_f64_v2); + break; + case NVPTXISD::StoreV4: + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4, + NVPTX::STV_i16_v4, NVPTX::STV_i32_v4, std::nullopt, + NVPTX::STV_f32_v4, std::nullopt); + break; } + if (!Opcode) return false; - Ops.append({Base, Offset, Chain}); - ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops); + SDNode *ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops); MachineMemOperand *MemRef = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(ST), {MemRef}); @@ -2265,27 +2038,28 @@ static inline bool isAddLike(const SDValue V) { (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint()); } -// SelectDirectAddr - Match a direct address for DAG. -// A direct address could be a globaladdress or externalsymbol. -bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { +// selectBaseADDR - Match a dag node which will serve as the base address for an +// ADDR operand pair. +static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) { // Return true if TGA or ES. if (N.getOpcode() == ISD::TargetGlobalAddress || - N.getOpcode() == ISD::TargetExternalSymbol) { - Address = N; - return true; - } - if (N.getOpcode() == NVPTXISD::Wrapper) { - Address = N.getOperand(0); - return true; - } + N.getOpcode() == ISD::TargetExternalSymbol) + return N; + + if (N.getOpcode() == NVPTXISD::Wrapper) + return N.getOperand(0); + // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol - if (AddrSpaceCastSDNode *CastN = dyn_cast(N)) { + if (AddrSpaceCastSDNode *CastN = dyn_cast(N)) if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC && CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM && CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam) - return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address); - } - return false; + return selectBaseADDR(CastN->getOperand(0).getOperand(0), DAG); + + if (auto *FIN = dyn_cast(N)) + return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0)); + + return N; } static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) { @@ -2306,37 +2080,17 @@ static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) { MVT::i32); } -// symbol+offset -bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset) { - Offset = accumulateOffset(Addr, SDLoc(OpNode), CurDAG); - return SelectDirectAddr(Addr, Base); -} - -// register+offset -void NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset, - MVT VT) { - - Offset = accumulateOffset(Addr, SDLoc(OpNode), CurDAG); - if (auto *FIN = dyn_cast(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); - return; - } - Base = Addr; -} - -// register+offset -bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset) { - SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32); - return true; -} - -// register+offset -bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset) { - SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); +// Select a pair of operands which represent a valid PTX address, this could be +// one of the following things: +// - [var] - Offset is simply set to 0 +// - [reg] - Offset is simply set to 0 +// - [reg+immOff] +// - [var+immOff] +// Note that immOff must fit into a 32-bit signed integer. +bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base, + SDValue &Offset) { + Offset = accumulateOffset(Addr, SDLoc(Addr), CurDAG); + Base = selectBaseADDR(Addr, CurDAG); return true; } @@ -2365,12 +2119,7 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( default: return true; case InlineAsm::ConstraintCode::m: // memory - if (SelectDirectAddr(Op, Op0)) { - OutOps.push_back(Op0); - OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); - return false; - } - if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { + if (SelectADDR(Op, Op0, Op1)) { OutOps.push_back(Op0); OutOps.push_back(Op1); return false; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 1d02ae333c86b..0a33001249e7e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -104,17 +104,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { return CurDAG->getTargetConstant(Imm, DL, MVT::i32); } - // Match direct address complex pattern. - bool SelectDirectAddr(SDValue N, SDValue &Address); - - void SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base, - SDValue &Offset, MVT VT); - bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base, - SDValue &Offset); - bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base, - SDValue &Offset); - bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base, - SDValue &Offset); + bool SelectADDR(SDValue Addr, SDValue &Base, SDValue &Offset); bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 6a0f708021a16..36a0a06bdb8aa 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1917,27 +1917,15 @@ defm SET_f64 : SET<"f64", Float64Regs, f64imm>; // Data Movement (Load / Store, Move) //----------------------------------- -let WantsRoot = true in { - def ADDRri : ComplexPattern; - def ADDRri64 : ComplexPattern; -} -def ADDRvar : ComplexPattern; +def addr : ComplexPattern; -def MEMri : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops Int32Regs, i32imm); -} -def MEMri64 : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops Int64Regs, i64imm); -} - -def imem : Operand { +def ADDR_base : Operand { let PrintMethod = "printOperand"; } -def imemAny : Operand { - let PrintMethod = "printOperand"; +def ADDR : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops ADDR_base, i32imm); } def LdStCode : Operand { @@ -1956,10 +1944,10 @@ def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; // Load a memory address into a u32 or u64 register. -def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a), +def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR_base:$a), "mov.u32 \t$dst, $a;", [(set i32:$dst, (Wrapper tglobaladdr:$a))]>; -def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), +def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR_base:$a), "mov.u64 \t$dst, $a;", [(set i64:$dst, (Wrapper tglobaladdr:$a))]>; @@ -2021,12 +2009,17 @@ def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>; //---- Copy Frame Index ---- -def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), - "add.u32 \t$dst, ${addr:add};", - [(set i32:$dst, ADDRri:$addr)]>; -def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), - "add.u64 \t$dst, ${addr:add};", - [(set i64:$dst, ADDRri64:$addr)]>; +def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR:$addr), + "add.u32 \t$dst, ${addr:add};", []>; +def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR:$addr), + "add.u64 \t$dst, ${addr:add};", []>; + +def to_tframeindex : SDNodeXFormgetTargetFrameIndex(N->getIndex(), N->getValueType(0)); +}]>; + +def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>; +def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>; //----------------------------------- // Comparison and Selection @@ -2660,7 +2653,7 @@ def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ", def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", [(LastCallArg (i32 1), (i32 imm:$a))]>; -def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ", +def CallVoidInst : NVPTXInst<(outs), (ins ADDR_base:$addr), "$addr, ", [(CallVoid (Wrapper tglobaladdr:$addr))]>; def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ", [(CallVoid i32:$addr)]>; @@ -2753,109 +2746,56 @@ foreach vt = [v2f16, v2bf16, v2i16, v4i8] in { // // Load / Store Handling // -multiclass LD { - def _ari : NVPTXInst< +class LD + : NVPTXInst< (outs regclass:$dst), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr$offset];", []>; - def _ari_64 : NVPTXInst< - (outs regclass:$dst), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr$offset];", []>; - def _asi : NVPTXInst< - (outs regclass:$dst), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), + i32imm:$fromWidth, ADDR:$addr), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr$offset];", []>; -} + "\t$dst, [$addr];", []>; let mayLoad=1, hasSideEffects=0 in { - defm LD_i8 : LD; - defm LD_i16 : LD; - defm LD_i32 : LD; - defm LD_i64 : LD; - defm LD_f32 : LD; - defm LD_f64 : LD; + def LD_i8 : LD; + def LD_i16 : LD; + def LD_i32 : LD; + def LD_i64 : LD; + def LD_f32 : LD; + def LD_f64 : LD; } -multiclass ST { - def _ari : NVPTXInst< +class ST + : NVPTXInst< (outs), (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, - Offseti32imm:$offset), + LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, ADDR:$addr), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr$offset], $src;", []>; - def _ari_64 : NVPTXInst< - (outs), - (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, - Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr$offset], $src;", []>; - def _asi : NVPTXInst< - (outs), - (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr, - Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr$offset], $src;", []>; -} + " \t[$addr], $src;", []>; let mayStore=1, hasSideEffects=0 in { - defm ST_i8 : ST; - defm ST_i16 : ST; - defm ST_i32 : ST; - defm ST_i64 : ST; - defm ST_f32 : ST; - defm ST_f64 : ST; + def ST_i8 : ST; + def ST_i16 : ST; + def ST_i32 : ST; + def ST_i64 : ST; + def ST_f32 : ST; + def ST_f64 : ST; } // The following is used only in and after vector elementizations. Vector // elementization happens at the machine instruction level, so the following // instructions never appear in the DAG. multiclass LD_VEC { - def _v2_ari : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr$offset];", []>; - def _v2_ari_64 : NVPTXInst< + def _v2 : NVPTXInst< (outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr$offset];", []>; - def _v2_asi : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr$offset];", []>; - def _v4_ari : NVPTXInst< + "\t{{$dst1, $dst2}}, [$addr];", []>; + def _v4 : NVPTXInst< (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>; - def _v4_ari_64 : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>; - def _v4_asi : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), - (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), - "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>; + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; } let mayLoad=1, hasSideEffects=0 in { defm LDV_i8 : LD_VEC; @@ -2867,48 +2807,20 @@ let mayLoad=1, hasSideEffects=0 in { } multiclass ST_VEC { - def _v2_ari : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope, - LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, - Int32Regs:$addr, Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr$offset], {{$src1, $src2}};", []>; - def _v2_ari_64 : NVPTXInst< + def _v2 : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, - Int64Regs:$addr, Offseti32imm:$offset), + ADDR:$addr), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr$offset], {{$src1, $src2}};", []>; - def _v2_asi : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope, - LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, - imem:$addr, Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr$offset], {{$src1, $src2}};", []>; - def _v4_ari : NVPTXInst< + "\t[$addr], {{$src1, $src2}};", []>; + def _v4 : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>; - def _v4_ari_64 : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>; - def _v4_asi : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), - "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}" - "$fromWidth \t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>; + "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; } let mayStore=1, hasSideEffects=0 in { diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 3373f9e90524f..d339afa39d891 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -2717,65 +2717,46 @@ defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; // Scalar -multiclass LDU_G { - def asi: NVPTXInst<(outs regclass:$result), (ins imemAny:$src, Offseti32imm:$offset), - "ldu.global." # TyStr # " \t$result, [$src$offset];", - []>, Requires<[hasLDU]>; - def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), +class LDU_G + : NVPTXInst<(outs regclass:$result), (ins ADDR:$src), "ldu.global." # TyStr # " \t$result, [$src];", []>, Requires<[hasLDU]>; - def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), - "ldu.global." # TyStr # " \t$result, [$src];", - []>, Requires<[hasLDU]>; -} -defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8", Int16Regs>; -defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16", Int16Regs>; -defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32", Int32Regs>; -defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64", Int64Regs>; -defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32", Float32Regs>; -defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64", Float64Regs>; +def INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8", Int16Regs>; +def INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16", Int16Regs>; +def INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32", Int32Regs>; +def INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64", Int64Regs>; +def INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32", Float32Regs>; +def INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64", Float64Regs>; // vector // Elementized vector ldu -multiclass VLDU_G_ELE_V2 { - def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins MEMri:$src), - "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>; - def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins MEMri64:$src), +class VLDU_G_ELE_V2 + : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins ADDR:$src), "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>; - def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins imemAny:$src, Offseti32imm:$offset), - "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src$offset];", []>; -} -multiclass VLDU_G_ELE_V4 { - def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri:$src), - "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; - def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri64:$src), + +class VLDU_G_ELE_V4 + : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins ADDR:$src), "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; - def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins imemAny:$src, Offseti32imm:$offset), - "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src$offset];", []>; -} -defm INT_PTX_LDU_G_v2i8_ELE : VLDU_G_ELE_V2<"u8", Int16Regs>; -defm INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"u16", Int16Regs>; -defm INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"u32", Int32Regs>; -defm INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"f32", Float32Regs>; -defm INT_PTX_LDU_G_v2i64_ELE : VLDU_G_ELE_V2<"u64", Int64Regs>; -defm INT_PTX_LDU_G_v2f64_ELE : VLDU_G_ELE_V2<"f64", Float64Regs>; -defm INT_PTX_LDU_G_v4i8_ELE : VLDU_G_ELE_V4<"u8", Int16Regs>; -defm INT_PTX_LDU_G_v4i16_ELE : VLDU_G_ELE_V4<"u16", Int16Regs>; -defm INT_PTX_LDU_G_v4i32_ELE : VLDU_G_ELE_V4<"u32", Int32Regs>; -defm INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>; -defm INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>; -defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"f32", Float32Regs>; +def INT_PTX_LDU_G_v2i8_ELE : VLDU_G_ELE_V2<"u8", Int16Regs>; +def INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"u16", Int16Regs>; +def INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"u32", Int32Regs>; +def INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"f32", Float32Regs>; +def INT_PTX_LDU_G_v2i64_ELE : VLDU_G_ELE_V2<"u64", Int64Regs>; +def INT_PTX_LDU_G_v2f64_ELE : VLDU_G_ELE_V2<"f64", Float64Regs>; + +def INT_PTX_LDU_G_v4i8_ELE : VLDU_G_ELE_V4<"u8", Int16Regs>; +def INT_PTX_LDU_G_v4i16_ELE : VLDU_G_ELE_V4<"u16", Int16Regs>; +def INT_PTX_LDU_G_v4i32_ELE : VLDU_G_ELE_V4<"u32", Int32Regs>; +def INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>; +def INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>; +def INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"f32", Float32Regs>; //----------------------------------- @@ -2786,64 +2767,44 @@ defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"f32", Float32Regs>; // non-coherent texture cache, and therefore the values read must be read-only // during the lifetime of the kernel. -multiclass LDG_G { - def asi: NVPTXInst<(outs regclass:$result), (ins imemAny:$src, Offseti32imm:$offset), - "ld.global.nc." # TyStr # " \t$result, [$src$offset];", - []>, Requires<[hasLDG]>; - def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), - "ld.global.nc." # TyStr # " \t$result, [$src];", - []>, Requires<[hasLDG]>; - def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), +class LDG_G + : NVPTXInst<(outs regclass:$result), (ins ADDR:$src), "ld.global.nc." # TyStr # " \t$result, [$src];", []>, Requires<[hasLDG]>; -} -defm INT_PTX_LDG_GLOBAL_i8 : LDG_G<"u8", Int16Regs>; -defm INT_PTX_LDG_GLOBAL_i16 : LDG_G<"u16", Int16Regs>; -defm INT_PTX_LDG_GLOBAL_i32 : LDG_G<"u32", Int32Regs>; -defm INT_PTX_LDG_GLOBAL_i64 : LDG_G<"u64", Int64Regs>; -defm INT_PTX_LDG_GLOBAL_f32 : LDG_G<"f32", Float32Regs>; -defm INT_PTX_LDG_GLOBAL_f64 : LDG_G<"f64", Float64Regs>; +def INT_PTX_LDG_GLOBAL_i8 : LDG_G<"u8", Int16Regs>; +def INT_PTX_LDG_GLOBAL_i16 : LDG_G<"u16", Int16Regs>; +def INT_PTX_LDG_GLOBAL_i32 : LDG_G<"u32", Int32Regs>; +def INT_PTX_LDG_GLOBAL_i64 : LDG_G<"u64", Int64Regs>; +def INT_PTX_LDG_GLOBAL_f32 : LDG_G<"f32", Float32Regs>; +def INT_PTX_LDG_GLOBAL_f64 : LDG_G<"f64", Float64Regs>; // vector // Elementized vector ldg -multiclass VLDG_G_ELE_V2 { - def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins MEMri:$src), - "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>; - def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins MEMri64:$src), - "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>; - def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins imemAny:$src, Offseti32imm:$offset), - "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src$offset];", []>; -} - -multiclass VLDG_G_ELE_V4 { - def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri:$src), - "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; - def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri64:$src), - "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; - def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins imemAny:$src, Offseti32imm:$offset), - "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src$offset];", []>; -} +class VLDG_G_ELE_V2 : + NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins ADDR:$src), + "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>; + + +class VLDG_G_ELE_V4 : + NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), + (ins ADDR:$src), + "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. -defm INT_PTX_LDG_G_v2i8_ELE : VLDG_G_ELE_V2<"u8", Int16Regs>; -defm INT_PTX_LDG_G_v2i16_ELE : VLDG_G_ELE_V2<"u16", Int16Regs>; -defm INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"u32", Int32Regs>; -defm INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"f32", Float32Regs>; -defm INT_PTX_LDG_G_v2i64_ELE : VLDG_G_ELE_V2<"u64", Int64Regs>; -defm INT_PTX_LDG_G_v2f64_ELE : VLDG_G_ELE_V2<"f64", Float64Regs>; +def INT_PTX_LDG_G_v2i8_ELE : VLDG_G_ELE_V2<"u8", Int16Regs>; +def INT_PTX_LDG_G_v2i16_ELE : VLDG_G_ELE_V2<"u16", Int16Regs>; +def INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"u32", Int32Regs>; +def INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"f32", Float32Regs>; +def INT_PTX_LDG_G_v2i64_ELE : VLDG_G_ELE_V2<"u64", Int64Regs>; +def INT_PTX_LDG_G_v2f64_ELE : VLDG_G_ELE_V2<"f64", Float64Regs>; -defm INT_PTX_LDG_G_v4i8_ELE : VLDG_G_ELE_V4<"u8", Int16Regs>; -defm INT_PTX_LDG_G_v4i16_ELE : VLDG_G_ELE_V4<"u16", Int16Regs>; -defm INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"u32", Int32Regs>; -defm INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"f32", Float32Regs>; +def INT_PTX_LDG_G_v4i8_ELE : VLDG_G_ELE_V4<"u8", Int16Regs>; +def INT_PTX_LDG_G_v4i16_ELE : VLDG_G_ELE_V4<"u16", Int16Regs>; +def INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"u32", Int32Regs>; +def INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"f32", Float32Regs>; multiclass NG_TO_G { @@ -2918,17 +2879,17 @@ def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), // @TODO: Are these actually needed, or will we always just see symbols // copied to registers first? -/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), +/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins ADDR_base:$s), "mov.u32 \t$r, $s;", [(set Int32Regs:$r, (int_nvvm_move_ptr texternalsym:$s))]>; -def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), +def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins ADDR_base:$s), "mov.u64 \t$r, $s;", [(set Int64Regs:$r, (int_nvvm_move_ptr texternalsym:$s))]>;*/ def texsurf_handles - : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), + : NVPTXInst<(outs Int64Regs:$result), (ins ADDR_base:$src), "mov.u64 \t$result, $src;", []>; //----------------------------------- @@ -7212,20 +7173,16 @@ class WMMA_REGINFO class BuildPatternI { // Build a dag pattern that matches the intrinsic call. dag ret = !foreach(tmp, Ins, - !subst(imem, ADDRvar, - !subst(MEMri64, ADDRri64, - !subst(MEMri, ADDRri, - !subst(ins, Intr, tmp))))); + !subst(ADDR, addr, + !subst(ins, Intr, tmp))); } // Same as above, but uses PatFrag instead of an Intrinsic. class BuildPatternPF { // Build a dag pattern that matches the intrinsic call. dag ret = !foreach(tmp, Ins, - !subst(imem, ADDRvar, - !subst(MEMri64, ADDRri64, - !subst(MEMri, ADDRri, - !subst(ins, Intr, tmp))))); + !subst(ADDR, addr, + !subst(ins, Intr, tmp))); } // Common WMMA-related fields used for building patterns for all MMA instructions. @@ -7242,10 +7199,9 @@ class WMMA_INSTR _Args> // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] // -class WMMA_LOAD +class WMMA_LOAD : WMMA_INSTR.record, - [!con((ins SrcOp:$src), + [!con((ins ADDR:$src), !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, Requires { // Load/store intrinsics are overloaded on pointer's address space. @@ -7282,9 +7238,9 @@ class WMMA_LOAD + bit WithStride> : WMMA_INSTR.record, - [!con((ins DstOp:$dst), + [!con((ins ADDR:$dst), Frag.Ins, !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, Requires { @@ -7323,14 +7279,12 @@ defset list MMA_LDSTs = { foreach layout = ["row", "col"] in { foreach stride = [false, true] in { foreach space = [".global", ".shared", ""] in { - foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { - foreach frag = NVVM_MMA_OPS.all_ld_ops in - if NVVM_WMMA_LDST_SUPPORTED.ret then - def : WMMA_LOAD, layout, space, stride, addr>; - foreach frag = NVVM_MMA_OPS.all_st_ops in - if NVVM_WMMA_LDST_SUPPORTED.ret then - def : WMMA_STORE_D, layout, space, stride, addr>; - } // addr + foreach frag = NVVM_MMA_OPS.all_ld_ops in + if NVVM_WMMA_LDST_SUPPORTED.ret then + def : WMMA_LOAD, layout, space, stride>; + foreach frag = NVVM_MMA_OPS.all_st_ops in + if NVVM_WMMA_LDST_SUPPORTED.ret then + def : WMMA_STORE_D, layout, space, stride>; } // space } // stride } // layout @@ -7457,9 +7411,8 @@ defset list MMAs = { // // ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 // -class LDMATRIX - : WMMA_INSTR.record, [(ins SrcOp:$src)]>, +class LDMATRIX + : WMMA_INSTR.record, [(ins ADDR:$src)]>, Requires { // Build PatFrag that only matches particular address space. PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src), @@ -7483,12 +7436,9 @@ class LDMATRIX LDMATRIXs = { foreach transposed = [false, true] in { foreach space = [".shared", ""] in { - foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { - foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in - if NVVM_LDMATRIX_SUPPORTED.ret then - def : LDMATRIX, transposed, space, - addr>; - } // addr + foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in + if NVVM_LDMATRIX_SUPPORTED.ret then + def : LDMATRIX, transposed, space>; } // space } // transposed } // defset diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index 4971d31691c54..46e4a905aa09a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -1800,7 +1800,7 @@ bool NVPTXReplaceImageHandles::replaceImageHandle(MachineOperand &Op, MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg()); switch (TexHandleDef.getOpcode()) { - case NVPTX::LD_i64_asi: { + case NVPTX::LD_i64: { // The handle is a parameter value being loaded, replace with the // parameter symbol const auto &TM = static_cast(MF.getTarget()); diff --git a/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir b/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir index 62ede3b9eef3b..400bff47c8f2e 100644 --- a/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir +++ b/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir @@ -16,7 +16,7 @@ registers: - { id: 1, class: float32regs } body: | bb.0.entry: - %0 = LD_f32_asi 0, 4, 1, 2, 32, &test_param_0, 0 + %0 = LD_f32 0, 4, 1, 2, 32, &test_param_0, 0 ; CHECK: [[@LINE+1]]:33: expected a floating point literal %1 = FADD_rnf32ri %0, float 3 StoreRetvalF32 %1, 0 diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir index 69c1e25a06024..486c6ca16a531 100644 --- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir +++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir @@ -40,9 +40,9 @@ registers: - { id: 7, class: float32regs } body: | bb.0.entry: - %0 = LD_f32_asi 0, 0, 4, 1, 2, 32, &test_param_0, 0 + %0 = LD_f32 0, 0, 4, 1, 2, 32, &test_param_0, 0 %1 = CVT_f64_f32 %0, 0 - %2 = LD_i32_asi 0, 0, 4, 1, 0, 32, &test_param_1, 0 + %2 = LD_i32 0, 0, 4, 1, 0, 32, &test_param_1, 0 ; CHECK: %3:float64regs = FADD_rnf64ri %1, double 3.250000e+00 %3 = FADD_rnf64ri %1, double 3.250000e+00 %4 = CVT_f32_f64 %3, 5 @@ -66,9 +66,9 @@ registers: - { id: 7, class: float32regs } body: | bb.0.entry: - %0 = LD_f32_asi 0, 0, 4, 1, 2, 32, &test2_param_0, 0 + %0 = LD_f32 0, 0, 4, 1, 2, 32, &test2_param_0, 0 %1 = CVT_f64_f32 %0, 0 - %2 = LD_i32_asi 0, 0, 4, 1, 0, 32, &test2_param_1, 0 + %2 = LD_i32 0, 0, 4, 1, 0, 32, &test2_param_1, 0 ; CHECK: %3:float64regs = FADD_rnf64ri %1, double 0x7FF8000000000000 %3 = FADD_rnf64ri %1, double 0x7FF8000000000000 %4 = CVT_f32_f64 %3, 5 diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir index cc9a36509db33..114b0f9702033 100644 --- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir +++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir @@ -16,7 +16,7 @@ registers: - { id: 1, class: float32regs } body: | bb.0.entry: - %0 = LD_f32_asi 0, 4, 1, 2, 32, &test_param_0, 0 + %0 = LD_f32 0, 4, 1, 2, 32, &test_param_0, 0 ; CHECK: [[@LINE+1]]:33: floating point constant does not have type 'float' %1 = FADD_rnf32ri %0, float 0xH3C00 StoreRetvalF32 %1, 0