@@ -715,12 +715,12 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
715
715
}
716
716
717
717
struct OperationOrderings {
718
- NVPTX::OrderingUnderlyingType instr_ordering ;
719
- NVPTX::OrderingUnderlyingType fence_ordering ;
718
+ NVPTX::OrderingUnderlyingType InstrOrdering ;
719
+ NVPTX::OrderingUnderlyingType FenceOrdering ;
720
720
OperationOrderings (NVPTX::Ordering o = NVPTX::Ordering::NotAtomic,
721
721
NVPTX::Ordering f = NVPTX::Ordering::NotAtomic)
722
- : instr_ordering (static_cast <NVPTX::OrderingUnderlyingType>(o)),
723
- fence_ordering (static_cast <NVPTX::OrderingUnderlyingType>(f)) {}
722
+ : InstrOrdering (static_cast <NVPTX::OrderingUnderlyingType>(o)),
723
+ FenceOrdering (static_cast <NVPTX::OrderingUnderlyingType>(f)) {}
724
724
};
725
725
726
726
static OperationOrderings
@@ -758,12 +758,19 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
758
758
// Lustig et al, A Formal Analysis of the NVIDIA PTX Memory Consistency Model, ASPLOS’19.
759
759
// https://dl.acm.org/doi/pdf/10.1145/3297858.3304043
760
760
//
761
- // | CUDA C++ Atomic Operation or Atomic Fence | PTX Atomic Operation or Fence |
762
- // |-----------------------------------------------------------------------------|-----------------------------------------|
763
- // | cuda::atomic_thread_fence(memory_order_seq_cst, cuda::thread_scope_<scope>) | fence.sc.<scope>; |
764
- // | cuda::atomic_load(memory_order_seq_cst, cuda::thread_scope_<scope>) | fence.sc.<scope>; ld.acquire.<scope>; |
765
- // | cuda::atomic_store(memory_order_seq_cst, cuda::thread_scope_<scope>) | fence.sc.<scope>; st.release.<scope>; |
766
- // | cuda::atomic_fetch_<op>(memory_order_seq_cst, cuda::thread_scope_<scope>) | fence.sc.<scope>; atom.acq_rel.<scope>; |
761
+ // | CUDA C++ Atomic Operation or Atomic Fence | PTX Atomic Operation or Fence |
762
+ // |------------------------------------------------------|-------------------------------|
763
+ // | cuda::atomic_thread_fence | fence.sc.<scope>; |
764
+ // | (memory_order_seq_cst, cuda::thread_scope_<scope>) | |
765
+ // |------------------------------------------------------|-------------------------------|
766
+ // | cuda::atomic_load | fence.sc.<scope>; |
767
+ // | (memory_order_seq_cst, cuda::thread_scope_<scope>) | ld.acquire.<scope>; |
768
+ // |------------------------------------------------------|-------------------------------|
769
+ // | cuda::atomic_store | fence.sc.<scope>; |
770
+ // | (memory_order_seq_cst, cuda::thread_scope_<scope>) | st.release.<scope>; |
771
+ // |------------------------------------------------------|-------------------------------|
772
+ // | cuda::atomic_fetch_<op> | fence.sc.<scope>; |
773
+ // | (memory_order_seq_cst, cuda::thread_scope_<scope>) | atom.acq_rel.<scope>; |
767
774
768
775
// clang-format on
769
776
@@ -892,11 +899,11 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
892
899
//
893
900
// This sets the ordering of the fence to SequentiallyConsistent, and
894
901
// sets the corresponding ordering for the instruction.
895
- NVPTX::Ordering ord ;
902
+ NVPTX::Ordering InstrOrder ;
896
903
if (N->readMem ()) {
897
- ord = NVPTX::Ordering::Acquire;
904
+ InstrOrder = NVPTX::Ordering::Acquire;
898
905
} else if (N->writeMem ()) {
899
- ord = NVPTX::Ordering::Release;
906
+ InstrOrder = NVPTX::Ordering::Release;
900
907
} else {
901
908
SmallString<256 > Msg;
902
909
raw_svector_ostream OS (Msg);
@@ -907,7 +914,7 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
907
914
report_fatal_error (OS.str ());
908
915
}
909
916
return AddrGenericOrGlobalOrShared
910
- ? OperationOrderings (ord ,
917
+ ? OperationOrderings (InstrOrder ,
911
918
NVPTX::Ordering::SequentiallyConsistent)
912
919
: OperationOrderings (NVPTX::Ordering::NotAtomic);
913
920
}
0 commit comments