-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[NVPTX] Load/Store/Fence syncscope support #106101
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-nvptx Author: None (gonzalobg) ChangesAdds "initial" support for Patch is 367.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106101.diff 14 Files Affected:
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index 5b568b0487b45a..2a44ce0273ee1b 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -233,46 +233,68 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
auto Ordering = NVPTX::Ordering(Imm);
switch (Ordering) {
case NVPTX::Ordering::NotAtomic:
- break;
- case NVPTX::Ordering::Volatile:
- O << ".volatile";
- break;
+ return;
case NVPTX::Ordering::Relaxed:
- O << ".relaxed.sys";
- break;
+ O << ".relaxed";
+ return;
case NVPTX::Ordering::Acquire:
- O << ".acquire.sys";
- break;
+ O << ".acquire";
+ return;
case NVPTX::Ordering::Release:
- O << ".release.sys";
- break;
+ O << ".release";
+ return;
+ case NVPTX::Ordering::Volatile:
+ O << ".volatile";
+ return;
case NVPTX::Ordering::RelaxedMMIO:
- O << ".mmio.relaxed.sys";
- break;
+ O << ".mmio.relaxed";
+ return;
default:
report_fatal_error(formatv(
- "NVPTX LdStCode Printer does not support \"{}\" sem modifier.",
- OrderingToCString(Ordering)));
+ "NVPTX LdStCode Printer does not support \"{}\" sem modifier. "
+ "Loads/Stores cannot be AcquireRelease or SequentiallyConsistent.",
+ OrderingToString(Ordering)));
+ }
+ } else if (!strcmp(Modifier, "sco")) {
+ auto S = NVPTX::Scope(Imm);
+ switch (S) {
+ case NVPTX::Scope::Thread:
+ return;
+ case NVPTX::Scope::System:
+ O << ".sys";
+ return;
+ case NVPTX::Scope::Block:
+ O << ".cta";
+ return;
+ case NVPTX::Scope::Cluster:
+ O << ".cluster";
+ return;
+ case NVPTX::Scope::Device:
+ O << ".gpu";
+ return;
}
+ report_fatal_error(formatv(
+ "NVPTX LdStCode Printer does not support \"{}\" sco modifier.",
+ ScopeToString(S)));
} else if (!strcmp(Modifier, "addsp")) {
switch (Imm) {
case NVPTX::PTXLdStInstCode::GLOBAL:
O << ".global";
- break;
+ return;
case NVPTX::PTXLdStInstCode::SHARED:
O << ".shared";
- break;
+ return;
case NVPTX::PTXLdStInstCode::LOCAL:
O << ".local";
- break;
+ return;
case NVPTX::PTXLdStInstCode::PARAM:
O << ".param";
- break;
+ return;
case NVPTX::PTXLdStInstCode::CONSTANT:
O << ".const";
- break;
+ return;
case NVPTX::PTXLdStInstCode::GENERIC:
- break;
+ return;
default:
llvm_unreachable("Wrong Address Space");
}
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index f6f6acb9e13c90..b5624f9212ea27 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -117,12 +117,22 @@ enum Ordering : OrderingUnderlyingType {
// Consume = 3, // Unimplemented in LLVM; NVPTX would map to "Acquire"
Acquire = (OrderingUnderlyingType)AtomicOrdering::Acquire,
Release = (OrderingUnderlyingType)AtomicOrdering::Release,
- // AcquireRelease = 6, // TODO
+ AcquireRelease = (OrderingUnderlyingType)AtomicOrdering::AcquireRelease,
SequentiallyConsistent =
(OrderingUnderlyingType)AtomicOrdering::SequentiallyConsistent,
Volatile = SequentiallyConsistent + 1,
RelaxedMMIO = Volatile + 1,
- LAST = RelaxedMMIO
+ LASTORDERING = RelaxedMMIO
+};
+
+using ScopeUnderlyingType = unsigned int;
+enum Scope : ScopeUnderlyingType {
+ Thread = 0,
+ System = 1,
+ Block = 2,
+ Cluster = 3,
+ Device = 4,
+ LASTSCOPE = Device
};
namespace PTXLdStInstCode {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 4f0bc1a2044642..f04796fcdd49fe 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -59,6 +59,7 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<NVPTXSubtarget>();
+ Scopes = NVPTXScopes(MF.getFunction().getContext());
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -106,6 +107,10 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
if (tryStore(N))
return;
break;
+ case ISD::ATOMIC_FENCE:
+ if (tryFence(N))
+ return;
+ break;
case ISD::EXTRACT_VECTOR_ELT:
if (tryEXTRACT_VECTOR_ELEMENT(N))
return;
@@ -915,6 +920,42 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
} // namespace
+NVPTX::Scope NVPTXDAGToDAGISel::getOperationScope(MemSDNode *N,
+ NVPTX::Ordering Ord) const {
+ switch (Ord) {
+ case NVPTX::Ordering::NotAtomic:
+ case NVPTX::Ordering::Volatile: // Non-atomic volatile operations
+ // NVPTX uses Thread scope as the scope of non-atomic operations.
+ return NVPTX::Scope::Thread;
+ case NVPTX::Ordering::RelaxedMMIO:
+ // RelaxedMMIO operations are always system scope.
+ // If a RelaxedMMIO order was generated from an atomic volatile operation
+ // with a smaller thread scope, we bump it here to system scope.
+ return NVPTX::Scope::System;
+ case NVPTX::Ordering::Relaxed:
+ case NVPTX::Ordering::Acquire:
+ case NVPTX::Ordering::Release:
+ case NVPTX::Ordering::AcquireRelease:
+ case NVPTX::Ordering::SequentiallyConsistent:
+ auto S = Scopes[N->getSyncScopeID()];
+
+ // Atomic operations must have a scope greater than thread.
+ if (S == NVPTX::Scope::Thread)
+ report_fatal_error(
+ formatv("Atomics need scope > \"{}\".", ScopeToString(S)));
+
+ // If scope is cluster, clusters must be supported.
+ if (S == NVPTX::Scope::Cluster)
+ Subtarget->requireClusters("cluster scope");
+
+ // If operation is volatile, then its scope is system.
+ if (N->isVolatile())
+ S = NVPTX::Scope::System;
+
+ return S;
+ }
+}
+
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
unsigned CodeAddrSpace, MachineFunction *F) {
// We use ldg (i.e. ld.global.nc) for invariant loads from the global address
@@ -957,33 +998,86 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
});
}
-NVPTX::Ordering NVPTXDAGToDAGISel::insertMemoryInstructionFence(SDLoc DL,
- SDValue &Chain,
- MemSDNode *N) {
+static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S,
+ NVPTXSubtarget const *T) {
+ if (S == NVPTX::Scope::Cluster)
+ T->requireClusters(".cluster scope fence");
+
+ switch (O) {
+ case NVPTX::Ordering::Acquire:
+ case NVPTX::Ordering::Release:
+ case NVPTX::Ordering::AcquireRelease: {
+ switch (S) {
+ case NVPTX::Scope::System:
+ return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_sys
+ : NVPTX::INT_MEMBAR_SYS;
+ case NVPTX::Scope::Block:
+ return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_cta
+ : NVPTX::INT_MEMBAR_CTA;
+ case NVPTX::Scope::Cluster:
+ return NVPTX::atomic_thread_fence_acq_rel_cluster;
+ case NVPTX::Scope::Device:
+ return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_gpu
+ : NVPTX::INT_MEMBAR_GL;
+ case NVPTX::Scope::Thread:
+ report_fatal_error(
+ formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
+ ScopeToString(S)));
+ }
+ }
+ case NVPTX::Ordering::SequentiallyConsistent: {
+ switch (S) {
+ case NVPTX::Scope::System:
+ return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_sys
+ : NVPTX::INT_MEMBAR_SYS;
+ case NVPTX::Scope::Block:
+ return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_cta
+ : NVPTX::INT_MEMBAR_CTA;
+ case NVPTX::Scope::Cluster:
+ return NVPTX::atomic_thread_fence_seq_cst_cluster;
+ case NVPTX::Scope::Device:
+ return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_gpu
+ : NVPTX::INT_MEMBAR_GL;
+ case NVPTX::Scope::Thread:
+ report_fatal_error(formatv("Unsupported scope \"{}\" for seq_cst fence.",
+ ScopeToString(S)));
+ }
+ }
+ case NVPTX::Ordering::NotAtomic:
+ case NVPTX::Ordering::Relaxed:
+ case NVPTX::Ordering::Volatile:
+ case NVPTX::Ordering::RelaxedMMIO:
+ report_fatal_error(
+ formatv("Unsupported \"{}\" ordering and \"{}\" scope for fence.",
+ OrderingToString(O), ScopeToString(S)));
+ }
+}
+
+std::pair<NVPTX::Ordering, NVPTX::Scope>
+NVPTXDAGToDAGISel::insertMemoryInstructionFence(SDLoc DL, SDValue &Chain,
+ MemSDNode *N) {
// Some memory instructions - loads, stores, atomics - need an extra fence
// instruction. Get the memory order of the instruction, and that of its
// fence, if any.
auto [InstructionOrdering, FenceOrdering] =
getOperationOrderings(N, Subtarget);
+ auto Scope = getOperationScope(N, InstructionOrdering);
// If a fence is required before the operation, insert it:
switch (NVPTX::Ordering(FenceOrdering)) {
case NVPTX::Ordering::NotAtomic:
break;
case NVPTX::Ordering::SequentiallyConsistent: {
- unsigned Op = Subtarget->hasMemoryOrdering()
- ? NVPTX::atomic_thread_fence_seq_cst_sys
- : NVPTX::INT_MEMBAR_SYS;
+ auto Op = getFenceOp(FenceOrdering, Scope, Subtarget);
Chain = SDValue(CurDAG->getMachineNode(Op, DL, MVT::Other, Chain), 0);
break;
}
default:
report_fatal_error(
formatv("Unexpected fence ordering: \"{}\".",
- OrderingToCString(NVPTX::Ordering(FenceOrdering))));
+ OrderingToString(NVPTX::Ordering(FenceOrdering))));
}
-
- return InstructionOrdering;
+ return std::make_pair(InstructionOrdering, Scope);
}
bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
@@ -1154,7 +1248,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
- auto InstructionOrdering = insertMemoryInstructionFence(DL, Chain, LD);
+ auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, LD);
// Type Setting: fromType + fromTypeWidth
//
@@ -1189,7 +1283,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
std::optional<unsigned> Opcode;
MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
- SmallVector<SDValue, 12> Ops({getI32Imm(InstructionOrdering, DL),
+ SmallVector<SDValue, 12> Ops({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
getI32Imm(CodeAddrSpace, DL),
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL)});
@@ -1266,7 +1360,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
- auto InstructionOrdering = insertMemoryInstructionFence(DL, Chain, MemSD);
+ auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, MemSD);
// Vector Setting
MVT SimpleVT = LoadedVT.getSimpleVT();
@@ -1319,7 +1413,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
std::optional<unsigned> Opcode;
SDNode *LD;
- SmallVector<SDValue, 12> Ops({getI32Imm(InstructionOrdering, DL),
+ SmallVector<SDValue, 12> Ops({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
getI32Imm(CodeAddrSpace, DL),
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL)});
@@ -1895,7 +1989,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
SDLoc DL(N);
SDValue Chain = ST->getChain();
- auto InstructionOrdering = insertMemoryInstructionFence(DL, Chain, ST);
+ auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, ST);
// Vector Setting
MVT SimpleVT = StoreVT.getSimpleVT();
@@ -1923,10 +2017,10 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
MVT::SimpleValueType SourceVT =
Value.getNode()->getSimpleValueType(0).SimpleTy;
- SmallVector<SDValue, 12> Ops({Value, getI32Imm(InstructionOrdering, DL),
- getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(ToType, DL),
- getI32Imm(ToTypeWidth, DL)});
+ SmallVector<SDValue, 12> Ops(
+ {Value, getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
+ getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL),
+ getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL)});
if (SelectDirectAddr(BasePtr, Addr)) {
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
@@ -2005,7 +2099,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
- auto InstructionOrdering = insertMemoryInstructionFence(DL, Chain, MemSD);
+ auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, MemSD);
// Type Setting: toType + toTypeWidth
// - for integer type, always use 'u'
@@ -2044,9 +2138,9 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
ToTypeWidth = 32;
}
- Ops.append({getI32Imm(InstructionOrdering, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(ToType, DL),
- getI32Imm(ToTypeWidth, DL)});
+ Ops.append({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
+ getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL),
+ getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL)});
if (SelectDirectAddr(N2, Addr)) {
switch (N->getOpcode()) {
@@ -4064,3 +4158,43 @@ unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
}
}
}
+
+bool NVPTXDAGToDAGISel::tryFence(SDNode *N) {
+ SDLoc DL(N);
+ assert(N->getOpcode() == ISD::ATOMIC_FENCE);
+ unsigned int FenceOp =
+ getFenceOp(NVPTX::Ordering(N->getConstantOperandVal(1)),
+ Scopes[N->getConstantOperandVal(2)], Subtarget);
+ SDValue Chain = N->getOperand(0);
+ SDNode *FenceNode = CurDAG->getMachineNode(FenceOp, DL, MVT::Other, Chain);
+ ReplaceNode(N, FenceNode);
+ return true;
+}
+
+NVPTXScopes::NVPTXScopes(LLVMContext &C) : CTX(&C) {
+ Scopes[C.getOrInsertSyncScopeID("singlethread")] = NVPTX::Scope::Thread;
+ Scopes[C.getOrInsertSyncScopeID("")] = NVPTX::Scope::System;
+ Scopes[C.getOrInsertSyncScopeID("block")] = NVPTX::Scope::Block;
+ Scopes[C.getOrInsertSyncScopeID("cluster")] = NVPTX::Scope::Cluster;
+ Scopes[C.getOrInsertSyncScopeID("device")] = NVPTX::Scope::Device;
+}
+
+NVPTX::Scope NVPTXScopes::operator[](SyncScope::ID ID) const {
+ if (Scopes.empty())
+ report_fatal_error("NVPTX Scopes must be initialized before calling "
+ "NVPTXScopes::operator[]");
+
+ auto S = Scopes.find(ID);
+ if (S == Scopes.end()) {
+ SmallVector<StringRef, 8> ScopeNames;
+ assert(CTX != nullptr && "CTX is nullptr");
+ CTX->getSyncScopeNames(ScopeNames);
+ StringRef Unknown{"unknown"};
+ auto Name = ID < ScopeNames.size() ? ScopeNames[ID] : Unknown;
+ report_fatal_error(
+ formatv("Could not find scope ID={} with name \"{}\".", int(ID), Name));
+ }
+ return S->second;
+}
+
+bool NVPTXScopes::empty() const { return Scopes.size() == 0; }
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index eac4056599511c..7eccf9e45314b1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -18,13 +18,26 @@
#include "NVPTXISelLowering.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXTargetMachine.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
+struct NVPTXScopes {
+ NVPTXScopes() = default;
+ NVPTXScopes(LLVMContext &C);
+ NVPTX::Scope operator[](SyncScope::ID ID) const;
+ bool empty() const;
+
+private:
+ SmallMapVector<SyncScope::ID, NVPTX::Scope, 8> Scopes{};
+ LLVMContext *CTX = nullptr;
+};
+
class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
const NVPTXTargetMachine &TM;
@@ -38,6 +51,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool allowUnsafeFPMath() const;
bool doRsqrtOpt() const;
+ NVPTXScopes Scopes{};
+
public:
NVPTXDAGToDAGISel() = delete;
@@ -66,6 +81,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool tryLoadParam(SDNode *N);
bool tryStoreRetval(SDNode *N);
bool tryStoreParam(SDNode *N);
+ bool tryFence(SDNode *N);
void SelectAddrSpaceCast(SDNode *N);
bool tryTextureIntrinsic(SDNode *N);
bool trySurfaceIntrinsic(SDNode *N);
@@ -100,8 +116,9 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
static unsigned GetConvertOpcode(MVT DestTy, MVT SrcTy, LoadSDNode *N);
- NVPTX::Ordering insertMemoryInstructionFence(SDLoc DL, SDValue &Chain,
- MemSDNode *N);
+ std::pair<NVPTX::Ordering, NVPTX::Scope>
+ insertMemoryInstructionFence(SDLoc DL, SDValue &Chain, MemSDNode *N);
+ NVPTX::Scope getOperationScope(MemSDNode *N, NVPTX::Ordering O) const;
};
class NVPTXDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index b57c86fcf697cd..85876197331976 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2955,39 +2955,39 @@ foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
multiclass LD<NVPTXRegClass regclass> {
def _avar : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$sco, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr),
- "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${sco:sco}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr];", []>;
def _areg : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$sco, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr),
- "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${sco:sco}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr];", []>;
def _areg_64 : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$sco, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr),
- "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${sco:sco}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr];", []>;
def _ari : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$sco, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
- "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${sco:sco}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr+$offset];", []>;
def _ari_64 : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec,
+ (ins LdStCode:$sem, LdStCode:$sco, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
- "ld${sem:sem}${a...
[truncated]
|
7d73a7a
to
0103ae8
Compare
0103ae8
to
ca1bf2f
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
✅ With the latest revision this PR passed the C/C++ code formatter. |
9bbf463
to
b760feb
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Patch looks OK overall, modulo a few cosmetic improvements that could be extracted into separate patches.
Have added the two For different sub-sequent PRs:
|
Ping to merge (I still can't). Would be good to merge this early in the week in case there are any regressions. |
Adds "initial" support for
syncscope
to the NVPTX backendload
/store
/fence
instructions.Atomic Read-Modify-Write operations intentionally not supported as part of this initial PR.