From 08cd286c569c14ddd3826ced4ccbac7260744439 Mon Sep 17 00:00:00 2001 From: Gonzalo Brito Gadeschi Date: Mon, 15 Jul 2024 12:23:44 -0700 Subject: [PATCH 1/3] [NVPTX] Support fence instruction --- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 29 ++++++++++++++++++++ llvm/test/CodeGen/NVPTX/fence.ll | 36 +++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 llvm/test/CodeGen/NVPTX/fence.ll diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index cd17a9de541ad..2ea69ef697c19 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -3908,3 +3908,32 @@ def : Pat < (V2I32toI64 (INT_NVVM_PRMT (I64toI32H Int64Regs:$a), (i32 0), (i32 0x0123)), (INT_NVVM_PRMT (I64toI32L Int64Regs:$a), (i32 0), (i32 0x0123)))>; + + +//////////////////////////////////////////////////////////////////////////////// +// PTX Fence instructions +//////////////////////////////////////////////////////////////////////////////// + +def atomic_thread_fence_seq_cst_sys : + NVPTXInst<(outs), (ins), "fence.sc.sys;", []>, + Requires<[hasPTX<60>, hasSM<70>]>; +def atomic_thread_fence_acq_rel_sys : + NVPTXInst<(outs), (ins), "fence.acq_rel.sys;", []>, + Requires<[hasPTX<60>, hasSM<70>]>; + +def atomic_thread_fence_seq_cst_sys_membar : + NVPTXInst<(outs), (ins), "membar.sys;", []>; + +def : Pat<(atomic_fence (i64 4), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // acquire(4) sys(1) + Requires<[hasPTX<60>, hasSM<70>]>; +def : Pat<(atomic_fence (i64 5), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // release(5) sys(1) + Requires<[hasPTX<60>, hasSM<70>]>; +def : Pat<(atomic_fence (i64 6), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // acq_rel(6) sys(1) + Requires<[hasPTX<60>, hasSM<70>]>; +def : Pat<(atomic_fence (i64 7), (i64 1)), (atomic_thread_fence_seq_cst_sys)>, // seq_cst(7) sys(1) + Requires<[hasPTX<60>, hasSM<70>]>; + +def : Pat<(atomic_fence (i64 4), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // acquire(4) sys(1) +def : Pat<(atomic_fence (i64 5), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // release(5) sys(1) +def : Pat<(atomic_fence (i64 6), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // acq_rel(6) sys(1) +def : Pat<(atomic_fence (i64 7), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // seq_cst(7) sys(1) diff --git a/llvm/test/CodeGen/NVPTX/fence.ll b/llvm/test/CodeGen/NVPTX/fence.ll new file mode 100644 index 0000000000000..d3aace95e9665 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/fence.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=SM60 +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} +; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s --check-prefix=SM70 +; RUN: %if ptxas-12.2 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %} + +; CHECK-LABEL: fence_sc_sys +define void @fence_sc_sys() local_unnamed_addr { + ; SM60: membar.sys + ; SM70: fence.sc.sys + fence seq_cst + ret void +} + +; CHECK-LABEL: fence_acq_rel_sys +define void @fence_acq_rel_sys() local_unnamed_addr { + ; SM60: membar.sys + ; SM70: fence.acq_rel.sys + fence acq_rel + ret void +} + +; CHECK-LABEL: fence_release_sys +define void @fence_release_sys() local_unnamed_addr { + ; SM60: membar.sys + ; SM70: fence.acq_rel.sys + fence release + ret void +} + +; CHECK-LABEL: fence_acquire_sys +define void @fence_acquire_sys() local_unnamed_addr { + ; SM60: membar.sys + ; SM70: fence.acq_rel.sys + fence acquire + ret void +} \ No newline at end of file From ee55b37dc19a34cc7f013eeebb57957f579c505b Mon Sep 17 00:00:00 2001 From: Gonzalo Brito Gadeschi Date: Tue, 16 Jul 2024 07:00:13 -0700 Subject: [PATCH 2/3] Fixup: use INT_MEMBAR_SYS instead of redefining the membar.sys opcode --- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 2ea69ef697c19..8db2c1d0db20c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -3921,9 +3921,6 @@ def atomic_thread_fence_acq_rel_sys : NVPTXInst<(outs), (ins), "fence.acq_rel.sys;", []>, Requires<[hasPTX<60>, hasSM<70>]>; -def atomic_thread_fence_seq_cst_sys_membar : - NVPTXInst<(outs), (ins), "membar.sys;", []>; - def : Pat<(atomic_fence (i64 4), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // acquire(4) sys(1) Requires<[hasPTX<60>, hasSM<70>]>; def : Pat<(atomic_fence (i64 5), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // release(5) sys(1) @@ -3933,7 +3930,7 @@ def : Pat<(atomic_fence (i64 6), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, / def : Pat<(atomic_fence (i64 7), (i64 1)), (atomic_thread_fence_seq_cst_sys)>, // seq_cst(7) sys(1) Requires<[hasPTX<60>, hasSM<70>]>; -def : Pat<(atomic_fence (i64 4), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // acquire(4) sys(1) -def : Pat<(atomic_fence (i64 5), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // release(5) sys(1) -def : Pat<(atomic_fence (i64 6), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // acq_rel(6) sys(1) -def : Pat<(atomic_fence (i64 7), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // seq_cst(7) sys(1) +def : Pat<(atomic_fence (i64 4), (i64 1)), (INT_MEMBAR_SYS)>; // acquire(4) sys(1) +def : Pat<(atomic_fence (i64 5), (i64 1)), (INT_MEMBAR_SYS)>; // release(5) sys(1) +def : Pat<(atomic_fence (i64 6), (i64 1)), (INT_MEMBAR_SYS)>; // acq_rel(6) sys(1) +def : Pat<(atomic_fence (i64 7), (i64 1)), (INT_MEMBAR_SYS)>; // seq_cst(7) sys(1) From 4985c5601c600369339355c700752002b51c2bfa Mon Sep 17 00:00:00 2001 From: gonzalobg <65027571+gonzalobg@users.noreply.github.com> Date: Fri, 19 Jul 2024 23:57:15 +0200 Subject: [PATCH 3/3] [NVPTX] Add comment to clarify pre sm70 behavior --- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 8db2c1d0db20c..f82013b63dc6a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -3930,6 +3930,8 @@ def : Pat<(atomic_fence (i64 6), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, / def : Pat<(atomic_fence (i64 7), (i64 1)), (atomic_thread_fence_seq_cst_sys)>, // seq_cst(7) sys(1) Requires<[hasPTX<60>, hasSM<70>]>; + +// If PTX<60 or SM<70, we fall back to MEMBAR: def : Pat<(atomic_fence (i64 4), (i64 1)), (INT_MEMBAR_SYS)>; // acquire(4) sys(1) def : Pat<(atomic_fence (i64 5), (i64 1)), (INT_MEMBAR_SYS)>; // release(5) sys(1) def : Pat<(atomic_fence (i64 6), (i64 1)), (INT_MEMBAR_SYS)>; // acq_rel(6) sys(1)