Skip to content

Commit 08cd286

Browse files
committed
[NVPTX] Support fence instruction
1 parent d36edf8 commit 08cd286

File tree

2 files changed

+65
-0
lines changed

2 files changed

+65
-0
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3908,3 +3908,32 @@ def : Pat <
39083908
(V2I32toI64
39093909
(INT_NVVM_PRMT (I64toI32H Int64Regs:$a), (i32 0), (i32 0x0123)),
39103910
(INT_NVVM_PRMT (I64toI32L Int64Regs:$a), (i32 0), (i32 0x0123)))>;
3911+
3912+
3913+
////////////////////////////////////////////////////////////////////////////////
3914+
// PTX Fence instructions
3915+
////////////////////////////////////////////////////////////////////////////////
3916+
3917+
def atomic_thread_fence_seq_cst_sys :
3918+
NVPTXInst<(outs), (ins), "fence.sc.sys;", []>,
3919+
Requires<[hasPTX<60>, hasSM<70>]>;
3920+
def atomic_thread_fence_acq_rel_sys :
3921+
NVPTXInst<(outs), (ins), "fence.acq_rel.sys;", []>,
3922+
Requires<[hasPTX<60>, hasSM<70>]>;
3923+
3924+
def atomic_thread_fence_seq_cst_sys_membar :
3925+
NVPTXInst<(outs), (ins), "membar.sys;", []>;
3926+
3927+
def : Pat<(atomic_fence (i64 4), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // acquire(4) sys(1)
3928+
Requires<[hasPTX<60>, hasSM<70>]>;
3929+
def : Pat<(atomic_fence (i64 5), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // release(5) sys(1)
3930+
Requires<[hasPTX<60>, hasSM<70>]>;
3931+
def : Pat<(atomic_fence (i64 6), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // acq_rel(6) sys(1)
3932+
Requires<[hasPTX<60>, hasSM<70>]>;
3933+
def : Pat<(atomic_fence (i64 7), (i64 1)), (atomic_thread_fence_seq_cst_sys)>, // seq_cst(7) sys(1)
3934+
Requires<[hasPTX<60>, hasSM<70>]>;
3935+
3936+
def : Pat<(atomic_fence (i64 4), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // acquire(4) sys(1)
3937+
def : Pat<(atomic_fence (i64 5), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // release(5) sys(1)
3938+
def : Pat<(atomic_fence (i64 6), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // acq_rel(6) sys(1)
3939+
def : Pat<(atomic_fence (i64 7), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // seq_cst(7) sys(1)

llvm/test/CodeGen/NVPTX/fence.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=SM60
2+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
3+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s --check-prefix=SM70
4+
; RUN: %if ptxas-12.2 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %}
5+
6+
; CHECK-LABEL: fence_sc_sys
7+
define void @fence_sc_sys() local_unnamed_addr {
8+
; SM60: membar.sys
9+
; SM70: fence.sc.sys
10+
fence seq_cst
11+
ret void
12+
}
13+
14+
; CHECK-LABEL: fence_acq_rel_sys
15+
define void @fence_acq_rel_sys() local_unnamed_addr {
16+
; SM60: membar.sys
17+
; SM70: fence.acq_rel.sys
18+
fence acq_rel
19+
ret void
20+
}
21+
22+
; CHECK-LABEL: fence_release_sys
23+
define void @fence_release_sys() local_unnamed_addr {
24+
; SM60: membar.sys
25+
; SM70: fence.acq_rel.sys
26+
fence release
27+
ret void
28+
}
29+
30+
; CHECK-LABEL: fence_acquire_sys
31+
define void @fence_acquire_sys() local_unnamed_addr {
32+
; SM60: membar.sys
33+
; SM70: fence.acq_rel.sys
34+
fence acquire
35+
ret void
36+
}

0 commit comments

Comments
 (0)