-
Notifications
You must be signed in to change notification settings - Fork 13.4k
Revert "[NVPTX] Add Volta Load/Store Atomics (.relaxed, .acquire, .release) and Volatile (.mmio/.volatile) support" #99695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…lease) a…" This reverts commit 2da30f8.
@llvm/pr-subscribers-backend-nvptx Author: Jorge Gorbe Moya (slackito) ChangesReverts llvm/llvm-project#98022 Patch is 108.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/99695.diff 7 Files Affected:
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index a004d64c21cc6..380d878c1f532 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -227,33 +227,9 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
if (Modifier) {
const MCOperand &MO = MI->getOperand(OpNum);
int Imm = (int) MO.getImm();
- if (!strcmp(Modifier, "sem")) {
- switch (Imm) {
- case NVPTX::PTXLdStInstCode::NotAtomic:
- break;
- case NVPTX::PTXLdStInstCode::Volatile:
+ if (!strcmp(Modifier, "volatile")) {
+ if (Imm)
O << ".volatile";
- break;
- case NVPTX::PTXLdStInstCode::Relaxed:
- O << ".relaxed.sys";
- break;
- case NVPTX::PTXLdStInstCode::Acquire:
- O << ".acquire.sys";
- break;
- case NVPTX::PTXLdStInstCode::Release:
- O << ".release.sys";
- break;
- case NVPTX::PTXLdStInstCode::RelaxedMMIO:
- O << ".mmio.relaxed.sys";
- break;
- default:
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "NVPTX LdStCode Printer does not support \"" << Imm
- << "\" sem modifier.";
- report_fatal_error(OS.str());
- break;
- }
} else if (!strcmp(Modifier, "addsp")) {
switch (Imm) {
case NVPTX::PTXLdStInstCode::GLOBAL:
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 3c7167b157025..b0cb24c63c3ce 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -107,14 +107,6 @@ enum LoadStore {
};
namespace PTXLdStInstCode {
-enum MemorySemantic {
- NotAtomic = 0, // PTX calls these: "Weak"
- Volatile = 1,
- Relaxed = 2,
- Acquire = 3,
- Release = 4,
- RelaxedMMIO = 5
-};
enum AddressSpace {
GENERIC = 0,
GLOBAL = 1,
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 371ec8596ef63..11193c11ede3b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -714,170 +714,6 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
return NVPTX::PTXLdStInstCode::GENERIC;
}
-static unsigned int getCodeMemorySemantic(MemSDNode *N,
- const NVPTXSubtarget *Subtarget) {
- AtomicOrdering Ordering = N->getSuccessOrdering();
- auto CodeAddrSpace = getCodeAddrSpace(N);
-
- bool HasMemoryOrdering = Subtarget->hasMemoryOrdering();
- bool HasRelaxedMMIO = Subtarget->hasRelaxedMMIO();
-
- // TODO: lowering for SequentiallyConsistent Operations: for now, we error.
- // TODO: lowering for AcquireRelease Operations: for now, we error.
- //
-
- // clang-format off
-
- // Lowering for non-SequentiallyConsistent Operations
- //
- // | Atomic | Volatile | Statespace | PTX sm_60- | PTX sm_70+ |
- // |---------|----------|--------------------|------------|------------------------------|
- // | No | No | All | plain | .weak |
- // | No | Yes | Generic,Shared, | .volatile | .volatile |
- // | | | Global [0] | | |
- // | No | Yes | Local,Const,Param | plain [1] | .weak [1] |
- // | Relaxed | No | Generic,Shared, | | |
- // | | | Global [0] | .volatile | <atomic sem> |
- // | Other | No | Generic,Shared, | Error [2] | <atomic sem> |
- // | | | Global [0] | | |
- // | Yes | No | Local,Const,Param | plain [1] | .weak [1] |
- // | Relaxed | Yes | Generic,Shared [0] | .volatile | .volatile |
- // | Relaxed | Yes | Global [0] | .volatile | .mmio.relaxed.sys (PTX 8.2+) |
- // | | | | | or .volatile (PTX 8.1-) |
- // | Relaxed | Yes | Local,Const,Param | plain [1] | .weak [1] |
- // | Other | Yes | Generic, Shared, | Error [2] | <atomic sem> [3] |
- // | | | / Global [0] | | |
-
- // clang-format on
-
- // [0]: volatile and atomics are only supported on global or shared
- // memory locations, accessed via generic/shared/global pointers.
- // MMIO is only supported on global memory locations,
- // accessed via generic/global pointers.
- // TODO: Implement MMIO access via generic pointer to global.
- // Currently implemented for global pointers only.
-
- // [1]: Lowering volatile/atomic operations to non-volatile/non-atomic
- // PTX instructions fails to preserve their C++ side-effects.
- //
- // Example (https://github.com/llvm/llvm-project/issues/62057):
- //
- // void example() {
- // std::atomic<bool> True = true;
- // while (True.load(std::memory_order_relaxed));
- // }
- //
- // A C++ program that calls "example" is well-defined: the infinite loop
- // performs an atomic operation. By lowering volatile/atomics to
- // "weak" memory operations, we are transforming the above into:
- //
- // void undefined_behavior() {
- // bool True = true;
- // while (True);
- // }
- //
- // which exhibits undefined behavior in both C++ and PTX.
- //
- // Calling "example" in CUDA C++ compiled for sm_60- exhibits undefined
- // behavior due to lack of Independent Forward Progress. Lowering these
- // to weak memory operations in sm_60- is therefore fine.
- //
- // TODO: lower atomic and volatile operations to memory locations
- // in local, const, and param to two PTX instructions in sm_70+:
- // - the "weak" memory instruction we are currently lowering to, and
- // - some other instruction that preserves the side-effect, e.g.,
- // a dead dummy volatile load.
-
- if (CodeAddrSpace == NVPTX::PTXLdStInstCode::LOCAL ||
- CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT ||
- CodeAddrSpace == NVPTX::PTXLdStInstCode::PARAM) {
- return NVPTX::PTXLdStInstCode::NotAtomic;
- }
-
- // [2]: Atomics with Ordering different than Relaxed are not supported on
- // sm_60 and older; this includes volatile atomics.
- if (!(Ordering == AtomicOrdering::NotAtomic ||
- Ordering == AtomicOrdering::Monotonic) &&
- !HasMemoryOrdering) {
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "PTX does not support \"atomic\" for orderings different than"
- "\"NotAtomic\" or \"Monotonic\" for sm_60 or older, but order is: \""
- << toIRString(Ordering) << "\".";
- report_fatal_error(OS.str());
- }
-
- // [3]: TODO: these should eventually use .mmio<.atomic sem>; for now we drop
- // the volatile semantics and preserve the atomic ones.
-
- // PTX volatile and PTX atomics are not available for statespace that differ
- // from .generic, .global, or .shared. The behavior of PTX volatile and PTX
- // atomics is undefined if the generic address does not refer to a .global or
- // .shared memory location.
- bool AddrGenericOrGlobalOrShared =
- (CodeAddrSpace == NVPTX::PTXLdStInstCode::GENERIC ||
- CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL ||
- CodeAddrSpace == NVPTX::PTXLdStInstCode::SHARED);
- bool UseRelaxedMMIO =
- HasRelaxedMMIO && CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL;
-
- switch (Ordering) {
- case AtomicOrdering::NotAtomic:
- return N->isVolatile() && AddrGenericOrGlobalOrShared
- ? NVPTX::PTXLdStInstCode::Volatile
- : NVPTX::PTXLdStInstCode::NotAtomic;
- case AtomicOrdering::Monotonic:
- if (N->isVolatile())
- return UseRelaxedMMIO ? NVPTX::PTXLdStInstCode::RelaxedMMIO
- : AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Volatile
- : NVPTX::PTXLdStInstCode::NotAtomic;
- else
- return HasMemoryOrdering ? NVPTX::PTXLdStInstCode::Relaxed
- : AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Volatile
- : NVPTX::PTXLdStInstCode::NotAtomic;
- case AtomicOrdering::Acquire:
- if (!N->readMem()) {
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "PTX only supports Acquire Ordering on reads: "
- << N->getOperationName();
- N->print(OS);
- report_fatal_error(OS.str());
- }
- return AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Acquire
- : NVPTX::PTXLdStInstCode::NotAtomic;
- case AtomicOrdering::Release:
- if (!N->writeMem()) {
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "PTX only supports Release Ordering on writes: "
- << N->getOperationName();
- N->print(OS);
- report_fatal_error(OS.str());
- }
- return AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Release
- : NVPTX::PTXLdStInstCode::NotAtomic;
- case AtomicOrdering::AcquireRelease: {
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "PTX only supports AcquireRelease Ordering on read-modify-write: "
- << N->getOperationName();
- N->print(OS);
- report_fatal_error(OS.str());
- }
- case AtomicOrdering::SequentiallyConsistent:
- case AtomicOrdering::Unordered:
- // TODO: support AcquireRelease and SequentiallyConsistent
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "NVPTX backend does not support AtomicOrdering \""
- << toIRString(Ordering) << "\" yet.";
- report_fatal_error(OS.str());
- }
-
- llvm_unreachable("unexpected unhandled case");
-}
-
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
unsigned CodeAddrSpace, MachineFunction *F) {
// We use ldg (i.e. ld.global.nc) for invariant loads from the global address
@@ -1080,18 +916,32 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
if (!LoadedVT.isSimple())
return false;
+ AtomicOrdering Ordering = LD->getSuccessOrdering();
+ // In order to lower atomic loads with stronger guarantees we would need to
+ // use load.acquire or insert fences. However these features were only added
+ // with PTX ISA 6.0 / sm_70.
+ // TODO: Check if we can actually use the new instructions and implement them.
+ if (isStrongerThanMonotonic(Ordering))
+ return false;
+
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
return tryLDGLDU(N);
}
- // Memory Semantic Setting
- unsigned int CodeMemorySem = getCodeMemorySemantic(LD, Subtarget);
-
unsigned int PointerSize =
CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
+ // Volatile Setting
+ // - .volatile is only available for .global and .shared
+ // - .volatile has the same memory synchronization semantics as .relaxed.sys
+ bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
+ if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ isVolatile = false;
+
// Type Setting: fromType + fromTypeWidth
//
// Sign : ISD::SEXTLOAD
@@ -1132,13 +982,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
- getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl),
- getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl),
- Addr,
- Chain};
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl), Addr, Chain };
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
: SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
@@ -1147,14 +993,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
- getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl),
- getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl),
- Base,
- Offset,
- Chain};
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
: SelectADDRri(N1.getNode(), N1, Base, Offset)) {
@@ -1169,14 +1010,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
- getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl),
- getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl),
- Base,
- Offset,
- Chain};
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else {
if (PointerSize == 64)
@@ -1190,13 +1026,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
- getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl),
- getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl),
- N1,
- Chain};
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl), N1, Chain };
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
}
@@ -1233,8 +1065,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
unsigned int PointerSize =
CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
- // Memory Semantic Setting
- unsigned int CodeMemorySem = getCodeMemorySemantic(MemSD, Subtarget);
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool IsVolatile = MemSD->isVolatile();
+ if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ IsVolatile = false;
// Vector Setting
MVT SimpleVT = LoadedVT.getSimpleVT();
@@ -1301,13 +1138,9 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
- getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL),
- getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL),
- Addr,
- Chain};
+ SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL), Addr, Chain };
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else if (PointerSize == 64
? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
@@ -1330,14 +1163,9 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
- getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL),
- getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL),
- Base,
- Offset,
- Chain};
+ SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else if (PointerSize == 64
? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
@@ -1380,14 +1208,9 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
- getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL),
- getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL),
- Base,
- Offset,
- Chain};
+ SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else {
@@ -1430,13 +1253,9 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
- getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL),
- getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL),
- Op1,
- Chain};
+ SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL), Op1, Chain };
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
}
@@ -1879,13 +1698,27 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
if (!StoreVT.isSimple())
return false;
+ AtomicOrdering Ordering = ST->getSuccessOrdering();
+ // In order to lower atomic loads with stronger guarantees we would need to
+ // use store.release or insert fences. However these features were only added
+ // with PTX ISA 6.0 / sm_70.
+ // TODO: Check if we can actually use the new instructions and implement them.
+ if (isStrongerThanMonotonic(Ordering))
+ return false;
+
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
unsigned int PointerSize =
CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
- // Memory Semantic Setting
- unsigned int CodeMemorySem = getCodeMemorySemantic(ST, Subtarget);
+ // Volatile Setting
+ // - .volatile is only available for .global and .shared
+ // - .volatile has the same memory synchronization semantics as .relaxed.sys
+ bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic;
+ if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff 9b02b75c4d035192d9f24038f70107d7ffbb0f77 91f99bdee022efd25779f300cf2ae548ccfe2092 --extensions cpp,h -- llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp llvm/lib/Target/NVPTX/NVPTX.h llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp llvm/lib/Target/NVPTX/NVPTXSubtarget.h View the diff from clang-format here.diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index b0cb24c63c..932e15204f 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -126,7 +126,7 @@ enum VecType {
V2 = 2,
V4 = 4
};
-}
+} // namespace PTXLdStInstCode
/// PTXCvtMode - Conversion code enumeration
namespace PTXCvtMode {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 11193c11ed..c9d76de68c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -982,9 +982,13 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), Addr, Chain };
+ SDValue Ops[] = {getI32Imm(isVolatile, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ Addr,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
: SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
@@ -993,9 +997,14 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
+ SDValue Ops[] = {getI32Imm(isVolatile, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ Base,
+ Offset,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
: SelectADDRri(N1.getNode(), N1, Base, Offset)) {
@@ -1010,9 +1019,14 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
+ SDValue Ops[] = {getI32Imm(isVolatile, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ Base,
+ Offset,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else {
if (PointerSize == 64)
@@ -1026,9 +1040,13 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), N1, Chain };
+ SDValue Ops[] = {getI32Imm(isVolatile, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ N1,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
}
@@ -1138,9 +1156,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL), Addr, Chain };
+ SDValue Ops[] = {getI32Imm(IsVolatile, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL),
+ Addr,
+ Chain};
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else if (PointerSize == 64
? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
@@ -1163,9 +1185,14 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
+ SDValue Ops[] = {getI32Imm(IsVolatile, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL),
+ Base,
+ Offset,
+ Chain};
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else if (PointerSize == 64
? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
@@ -1208,9 +1235,14 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
+ SDValue Ops[] = {getI32Imm(IsVolatile, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL),
+ Base,
+ Offset,
+ Chain};
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else {
@@ -1253,9 +1285,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL), Op1, Chain };
+ SDValue Ops[] = {getI32Imm(IsVolatile, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL),
+ Op1,
+ Chain};
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
}
|
I'm going to ignore the clang-format check, to keep this a straight revert of the original patch. |
Looks like you may need to set skip-precommit-approval label. |
Please always provide the revert reason in the commit message so it shows up in the history log. |
Will do, sorry about that. And thank you for pointing it out. |
The problem is that XLA is using This looks like a mistake on XLA's side, which should be using #99709 updates this patch to provide |
Reverts #98022