-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[NVPTX] Add Volta Atomic SequentiallyConsistent Load and Store Operations #98551
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-nvptx Author: None (gonzalobg) ChangesThis PR Builds on #98022 . Patch is 119.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98551.diff 7 Files Affected:
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index b7a20c351f5ff..9f1e56cd84e51 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -224,9 +224,47 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
if (Modifier) {
const MCOperand &MO = MI->getOperand(OpNum);
int Imm = (int) MO.getImm();
- if (!strcmp(Modifier, "volatile")) {
- if (Imm)
+ if (!strcmp(Modifier, "sem")) {
+ auto Ordering =
+ NVPTX::Ordering(static_cast<NVPTX::OrderingUnderlyingType>(Imm));
+ switch (Ordering) {
+ case NVPTX::Ordering::NotAtomic:
+ break;
+ case NVPTX::Ordering::Volatile:
O << ".volatile";
+ break;
+ case NVPTX::Ordering::Relaxed:
+ O << ".relaxed.sys";
+ break;
+ case NVPTX::Ordering::Acquire:
+ O << ".acquire.sys";
+ break;
+ case NVPTX::Ordering::Release:
+ O << ".release.sys";
+ break;
+ case NVPTX::Ordering::RelaxedMMIO:
+ O << ".mmio.relaxed.sys";
+ break;
+ default:
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "NVPTX LdStCode Printer does not support \"" << Ordering
+ << "\" sem modifier.";
+ report_fatal_error(OS.str());
+ break;
+ }
+ } else if (!strcmp(Modifier, "sc")) {
+ auto Ordering =
+ NVPTX::Ordering(static_cast<NVPTX::OrderingUnderlyingType>(Imm));
+ switch (Ordering) {
+ // TODO: refactor fence insertion in ISelDagToDag instead of here
+ // as part of implementing atomicrmw seq_cst.
+ case NVPTX::Ordering::SequentiallyConsistent:
+ O << "fence.sc.sys;\n\t";
+ break;
+ default:
+ break;
+ }
} else if (!strcmp(Modifier, "addsp")) {
switch (Imm) {
case NVPTX::PTXLdStInstCode::GLOBAL:
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index b0cb24c63c3ce..c9cce23788ca4 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -106,6 +106,52 @@ enum LoadStore {
isStoreShift = 6
};
+// Extends LLVM AtomicOrdering with PTX Orderings:
+using OrderingUnderlyingType = unsigned int;
+enum class Ordering : OrderingUnderlyingType {
+ NotAtomic = 0, // PTX calls these: "Weak"
+ // Unordered = 1, // TODO: NVPTX should map this to "Relaxed"
+ Relaxed = 2,
+ // Consume = 3, // Unimplemented in LLVM; NVPTX would map to "Acquire"
+ Acquire = 4,
+ Release = 5,
+ // AcquireRelease = 6, // TODO
+ SequentiallyConsistent = 7,
+ Volatile = 8,
+ RelaxedMMIO = 9,
+ LAST = RelaxedMMIO
+};
+
+template <typename OStream> OStream &operator<<(OStream &O, Ordering Order) {
+ switch (Order) {
+ case Ordering::NotAtomic:
+ O << "NotAtomic";
+ return O;
+ case Ordering::Relaxed:
+ O << "Relaxed";
+ return O;
+ case Ordering::Acquire:
+ O << "Acquire";
+ return O;
+ case Ordering::Release:
+ O << "Release";
+ return O;
+ // case Ordering::AcquireRelease:
+ // O << "AcquireRelease";
+ // return O;
+ case Ordering::SequentiallyConsistent:
+ O << "SequentiallyConsistent";
+ return O;
+ case Ordering::Volatile:
+ O << "Volatile";
+ return O;
+ case Ordering::RelaxedMMIO:
+ O << "RelaxedMMIO";
+ return O;
+ }
+ report_fatal_error("unknown ordering");
+}
+
namespace PTXLdStInstCode {
enum AddressSpace {
GENERIC = 0,
@@ -126,7 +172,7 @@ enum VecType {
V2 = 2,
V4 = 4
};
-}
+} // namespace PTXLdStInstCode
/// PTXCvtMode - Conversion code enumeration
namespace PTXCvtMode {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 1e1cbb15e33d4..41a3c2102427f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -700,6 +700,183 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
return NVPTX::PTXLdStInstCode::GENERIC;
}
+struct OperationOrderings {
+ NVPTX::OrderingUnderlyingType InstrOrdering;
+ NVPTX::OrderingUnderlyingType FenceOrdering;
+ OperationOrderings(NVPTX::Ordering o = NVPTX::Ordering::NotAtomic,
+ NVPTX::Ordering f = NVPTX::Ordering::NotAtomic)
+ : InstrOrdering(static_cast<NVPTX::OrderingUnderlyingType>(o)),
+ FenceOrdering(static_cast<NVPTX::OrderingUnderlyingType>(f)) {}
+};
+
+static OperationOrderings
+getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
+ AtomicOrdering Ordering = N->getSuccessOrdering();
+ auto CodeAddrSpace = getCodeAddrSpace(N);
+
+ bool HasMemoryOrdering = Subtarget->hasMemoryOrdering();
+ bool HasRelaxedMMIO = Subtarget->hasRelaxedMMIO();
+
+ // clang-format off
+
+ // Lowering for Load/Store Operations (note: AcquireRelease Loads or Stores error).
+ //
+ // | Atomic | Volatile | Statespace | Lowering sm_60- | Lowering sm_70+ |
+ // |---------|----------|-------------------------------|-----------------|------------------------------------------------------|
+ // | No | No | All | plain | .weak |
+ // | No | Yes | Generic / Shared / Global [0] | .volatile | .volatile |
+ // | No | Yes | Local / Const / Param | plain [1] | .weak [1] |
+ // | Relaxed | No | Generic / Shared / Global [0] | .volatile | <atomic sem> |
+ // | Other | No | Generic / Shared / Global [0] | Error [2] | <atomic sem> |
+ // | Yes | No | Local / Const / Param | plain [1] | .weak [1] |
+ // | Relaxed | Yes | Generic / Shared [0] | .volatile | .volatile |
+ // | Relaxed | Yes | Global [0] | .volatile | .mmio.relaxed.sys (PTX 8.2+) or .volatile (PTX 8.1-) |
+ // | Relaxed | Yes | Local / Const / Param | plain [1] | .weak [1] |
+ // | Other | Yes | Generic / Shared / Global [0] | Error [2] | <atomic sem> [3] |
+
+ // Lowering of CUDA C++ SequentiallyConsistent Operations and Fences to PTX
+ // by following the ABI proven sound in:
+ // Lustig et al, A Formal Analysis of the NVIDIA PTX Memory Consistency Model, ASPLOS’19.
+ // https://dl.acm.org/doi/pdf/10.1145/3297858.3304043
+ //
+ // | CUDA C++ Atomic Operation or Atomic Fence | PTX Atomic Operation or Fence |
+ // |-----------------------------------------------------------------------------|-----------------------------------------|
+ // | cuda::atomic_thread_fence(memory_order_seq_cst, cuda::thread_scope_<scope>) | fence.sc.<scope>; |
+ // | cuda::atomic_load(memory_order_seq_cst, cuda::thread_scope_<scope>) | fence.sc.<scope>; ld.acquire.<scope>; |
+ // | cuda::atomic_store(memory_order_seq_cst, cuda::thread_scope_<scope>) | fence.sc.<scope>; st.release.<scope>; |
+ // | cuda::atomic_fetch_<op>(memory_order_seq_cst, cuda::thread_scope_<scope>) | fence.sc.<scope>; atom.acq_rel.<scope>; |
+
+ // clang-format on
+
+ // [0]: volatile and atomics are only supported on generic addressing to
+ // shared or global, or shared, or global.
+ // MMIO requires generic addressing to global or global, but
+ // (TODO) we only implement it for global.
+
+ // [1]: TODO: this implementation exhibits PTX Undefined Behavior; it
+ // fails to preserve the side-effects of atomics and volatile
+ // accesses in LLVM IR to local / const / param, causing
+ // well-formed LLVM-IR & CUDA C++ programs to be miscompiled
+ // in sm_70+.
+
+ if (CodeAddrSpace == NVPTX::PTXLdStInstCode::LOCAL ||
+ CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT ||
+ CodeAddrSpace == NVPTX::PTXLdStInstCode::PARAM) {
+ return NVPTX::Ordering::NotAtomic;
+ }
+
+ // [2]: Atomics with Ordering different than Relaxed are not supported on
+ // sm_60 and older; this includes volatile atomics.
+ if (!(Ordering == AtomicOrdering::NotAtomic ||
+ Ordering == AtomicOrdering::Monotonic) &&
+ !HasMemoryOrdering) {
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "PTX does not support \"atomic\" for orderings different than"
+ "\"NotAtomic\" or \"Monotonic\" for sm_60 or older, but order is: \""
+ << toIRString(Ordering) << "\".";
+ report_fatal_error(OS.str());
+ }
+
+ // [3]: TODO: these should eventually use .mmio<.atomic sem>; for now we drop
+ // the volatile semantics and preserve the atomic ones.
+
+ // PTX volatile and PTX atomics are not available for statespace that differ
+ // from .generic, .global, or .shared. The behavior of PTX volatile and PTX
+ // atomics is undefined if the generic address does not refer to a .global or
+ // .shared memory location.
+ bool AddrGenericOrGlobalOrShared =
+ (CodeAddrSpace == NVPTX::PTXLdStInstCode::GENERIC ||
+ CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL ||
+ CodeAddrSpace == NVPTX::PTXLdStInstCode::SHARED);
+ bool UseRelaxedMMIO =
+ HasRelaxedMMIO && CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL;
+
+ switch (Ordering) {
+ case AtomicOrdering::NotAtomic:
+ return N->isVolatile() && AddrGenericOrGlobalOrShared
+ ? NVPTX::Ordering::Volatile
+ : NVPTX::Ordering::NotAtomic;
+ case AtomicOrdering::Monotonic:
+ if (N->isVolatile())
+ return UseRelaxedMMIO ? NVPTX::Ordering::RelaxedMMIO
+ : AddrGenericOrGlobalOrShared ? NVPTX::Ordering::Volatile
+ : NVPTX::Ordering::NotAtomic;
+ else
+ return HasMemoryOrdering ? NVPTX::Ordering::Relaxed
+ : AddrGenericOrGlobalOrShared ? NVPTX::Ordering::Volatile
+ : NVPTX::Ordering::NotAtomic;
+ // case AtomicOrdering::Consume: // If LLVM ever provides this, lower it to
+ // Acquire.
+ case AtomicOrdering::Acquire:
+ if (!N->readMem()) {
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "PTX only supports Acquire Ordering on reads: "
+ << N->getOperationName();
+ N->print(OS);
+ report_fatal_error(OS.str());
+ }
+ return AddrGenericOrGlobalOrShared ? NVPTX::Ordering::Acquire
+ : NVPTX::Ordering::NotAtomic;
+ case AtomicOrdering::Release:
+ if (!N->writeMem()) {
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "PTX only supports Release Ordering on writes: "
+ << N->getOperationName();
+ N->print(OS);
+ report_fatal_error(OS.str());
+ }
+ return AddrGenericOrGlobalOrShared ? NVPTX::Ordering::Release
+ : NVPTX::Ordering::NotAtomic;
+ case AtomicOrdering::AcquireRelease: {
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "NVPTX does not support AcquireRelease Ordering on read-modify-write "
+ "yet and PTX does not support it on loads or stores: "
+ << N->getOperationName();
+ N->print(OS);
+ report_fatal_error(OS.str());
+ }
+ case AtomicOrdering::SequentiallyConsistent: {
+ // LLVM-IR SequentiallyConsistent atomics map to a two-instruction PTX
+ // sequence including a "fence.sc.sco" and the memory instruction with an
+ // Ordering that differs from "sc": acq, rel, or acq_rel, depending on
+ // whether the memory operation is a read, write, or read-modify-write.
+ //
+ // This sets the ordering of the fence to SequentiallyConsistent, and
+ // sets the corresponding ordering for the instruction.
+ NVPTX::Ordering InstrOrder;
+ if (N->readMem()) {
+ InstrOrder = NVPTX::Ordering::Acquire;
+ } else if (N->writeMem()) {
+ InstrOrder = NVPTX::Ordering::Release;
+ } else {
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "NVPTX does not support SequentiallyConsistent Ordering on "
+ "read-modify-writes yet: "
+ << N->getOperationName();
+ N->print(OS);
+ report_fatal_error(OS.str());
+ }
+ return AddrGenericOrGlobalOrShared
+ ? OperationOrderings(InstrOrder,
+ NVPTX::Ordering::SequentiallyConsistent)
+ : OperationOrderings(NVPTX::Ordering::NotAtomic);
+ }
+ case AtomicOrdering::Unordered:
+ break;
+ }
+
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "NVPTX backend does not support AtomicOrdering \""
+ << toIRString(Ordering) << "\" yet.";
+ report_fatal_error(OS.str());
+}
+
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
unsigned CodeAddrSpace, MachineFunction *F) {
// We use ldg (i.e. ld.global.nc) for invariant loads from the global address
@@ -902,32 +1079,19 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
if (!LoadedVT.isSimple())
return false;
- AtomicOrdering Ordering = LD->getSuccessOrdering();
- // In order to lower atomic loads with stronger guarantees we would need to
- // use load.acquire or insert fences. However these features were only added
- // with PTX ISA 6.0 / sm_70.
- // TODO: Check if we can actually use the new instructions and implement them.
- if (isStrongerThanMonotonic(Ordering))
- return false;
-
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
return tryLDGLDU(N);
}
+ // Memory Semantic Setting
+ auto [InstructionOrdering, FenceOrdering] =
+ getOperationOrderings(LD, Subtarget);
+
unsigned int PointerSize =
CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
- // Volatile Setting
- // - .volatile is only available for .global and .shared
- // - .volatile has the same memory synchronization semantics as .relaxed.sys
- bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
- if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
- CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
- CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
- isVolatile = false;
-
// Type Setting: fromType + fromTypeWidth
//
// Sign : ISD::SEXTLOAD
@@ -968,9 +1132,14 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), Addr, Chain };
+ SDValue Ops[] = {getI32Imm(FenceOrdering, dl),
+ getI32Imm(InstructionOrdering, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ Addr,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
: SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
@@ -979,9 +1148,15 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
+ SDValue Ops[] = {getI32Imm(FenceOrdering, dl),
+ getI32Imm(InstructionOrdering, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ Base,
+ Offset,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
: SelectADDRri(N1.getNode(), N1, Base, Offset)) {
@@ -996,9 +1171,15 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
+ SDValue Ops[] = {getI32Imm(FenceOrdering, dl),
+ getI32Imm(InstructionOrdering, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ Base,
+ Offset,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else {
if (PointerSize == 64)
@@ -1012,9 +1193,14 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), N1, Chain };
+ SDValue Ops[] = {getI32Imm(FenceOrdering, dl),
+ getI32Imm(InstructionOrdering, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ N1,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
}
@@ -1051,13 +1237,9 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
unsigned int PointerSize =
CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
- // Volatile Setting
- // - .volatile is only availalble for .global and .shared
- bool IsVolatile = MemSD->isVolatile();
- if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
- CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
- CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
- IsVolatile = false;
+ // Memory Semantic Setting
+ auto [InstructionOrdering, FenceOrdering] =
+ getOperationOrderings(MemSD, Subtarget);
// Vector Setting
MVT SimpleVT = LoadedVT.getSimpleVT();
@@ -1124,9 +1306,14 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL), Addr, Chain };
+ SDValue Ops[] = {getI32Imm(FenceOrdering, DL),
+ getI32Imm(InstructionOrdering, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL),
+ Addr,
+ Chain};
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else if (PointerSize == 64
? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
@@ -1149,9 +1336,15 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),...
[truncated]
|
@AlexMaclean @Artem-B @jlebar hello all three, I'm looking for some reviewers for this series of PRs to improve the support of atomics in LLVM NVPTX backend. It's my first time touching these parts of the LLVM project, but everyone in the LLVM Discord has been super nice helping me get onboarded. This PR includes all the changes of #96436 and #98022, which haven't really gotten any replies yet.. I just kept building on them. Not sure whether I should close them and continue building on them, or whether it is easier to review them piece meal. I'm happy to help review other people's PR and help with the NVPTX backend in general. I'm "gonzalob" in the LLVM Discord in case anyone wants to chat :) |
Sorry, I was out of office for a while. I'm catching up on the pending reviews now. Stay tuned. |
78e3b5e
to
2ecbb77
Compare
2ecbb77
to
25e94e7
Compare
Have rebased this on top of #99649 . This is now a simpler change on top of that one. |
696c71e
to
3fd60bd
Compare
Have rebased on top of the merged PRs, and extended the |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks OK overall, with some minor comments.
I'll be out till Tuesday, and will not be able to do the next review round until later next week.
2574b35
to
a3d9507
Compare
- Remove dead code in InstPrinter - Capitalization and improve Table comments - Update fence->membar lowering
a3d9507
to
c9a5dd8
Compare
llvm/lib/Target/NVPTX/NVPTX.h
Outdated
Release = 4, | ||
RelaxedMMIO = 5 | ||
// Consume = 3, // Unimplemented in LLVM; NVPTX would map to "Acquire" | ||
Acquire = 4, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need specific enum values? If not, drop them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we really really need them, but in the DAG they are just (i32 x)
nodes where x
is one of these values, and the values picked here match the ones of LLVM AtomicOrdering
for the cases in which both expose the same ordering, which makes it easy to interpret those DAG nodes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK. The only problem is that hardcoded numbers may eventually deviate from the values used by AtomicOrdering
.
Perhaps instead of the literal values, we should use AtomicOrdering
enum values, possibly with a set of static asserts ensuring that the values in this enum are strictly ordered, if we rely on that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good idea with the static_assert
s, that would prevent this from breaking.
Since we have some orderings that extend AtomicOrdering and we have to set LAST
, etc., I've left the integer values though. If a static_assert
fail, we'll need to update this properly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Few more minor nits, and I think we should be good to go.
llvm/lib/Target/NVPTX/NVPTX.h
Outdated
Release = 4, | ||
RelaxedMMIO = 5 | ||
// Consume = 3, // Unimplemented in LLVM; NVPTX would map to "Acquire" | ||
Acquire = 4, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK. The only problem is that hardcoded numbers may eventually deviate from the values used by AtomicOrdering
.
Perhaps instead of the literal values, we should use AtomicOrdering
enum values, possibly with a set of static asserts ensuring that the values in this enum are strictly ordered, if we rely on that.
✅ With the latest revision this PR passed the C/C++ code formatter. |
@Artem-B This is green! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Let's wait till Monday to land it.
} | ||
|
||
SDNode *NVPTXST = NVPTXST = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One assignment too much here?
gcc detects this and warns like
../lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp:1977:29: warning: operation on 'NVPTXST' may be undefined [-Wsequence-point]
1977 | SDNode *NVPTXST = NVPTXST =
| ~~~~~~~~^
1978 | CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Indeed, thank you for reporting this.
Fix here: #103045
Solves typo found by @mikaelholmen here: #98551 (comment)
This PR Builds on #98022 .
It adds support for Volta's SequentiallyConsistent Load and Store operations at system scope.