|
15 | 15 | ; Compilation command: |
16 | 16 | ; clang -cc1 -triple spir-unknown-unknown -O0 -cl-std=CL2.0 -emit-llvm kernel_query.cl |
17 | 17 |
|
18 | | -; RUN: llvm-as -opaque-pointers=0 %s -o %t.bc |
19 | | -; RUN: llvm-spirv %t.bc -opaque-pointers=0 -spirv-text -o %t.spv.txt |
| 18 | +; RUN: llvm-as %s -o %t.bc |
| 19 | +; RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt |
20 | 20 | ; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV |
21 | | -; RUN: llvm-spirv %t.bc -opaque-pointers=0 -o %t.spv |
| 21 | +; RUN: llvm-spirv %t.bc -o %t.spv |
22 | 22 | ; RUN: llvm-spirv -r -emit-opaque-pointers %t.spv -o %t.rev.bc |
23 | 23 | ; RUN: llvm-dis %t.rev.bc |
24 | 24 | ; RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM |
@@ -68,119 +68,115 @@ entry: |
68 | 68 |
|
69 | 69 | %ndrange = alloca %struct.ndrange_t, align 4 |
70 | 70 |
|
71 | | -; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit1Tmp:[0-9]+]] [[BlockGlb1]] |
72 | | -; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]] [[BlockLit1Tmp]] |
| 71 | +; CHECK-SPIRV: PtrCastToGeneric {{[0-9]+}} [[BlockLit1Tmp:[0-9]+]] [[BlockGlb1]] |
| 72 | +; CHECK-SPIRV: Bitcast [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]] [[BlockLit1Tmp]] |
73 | 73 | ; CHECK-SPIRV: GetKernelWorkGroupSize [[Int32Ty]] {{[0-9]+}} [[BlockKer1]] [[BlockLit1]] [[ConstInt8]] [[ConstInt8]] |
74 | 74 |
|
75 | 75 | ; CHECK-LLVM: call i32 @__get_kernel_work_group_size_impl(ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}) |
76 | 76 |
|
77 | | - %0 = call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) |
| 77 | + %0 = call i32 @__get_kernel_work_group_size_impl(ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke_kernel to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @__block_literal_global to ptr addrspace(4))) |
78 | 78 |
|
79 | | -; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit2Tmp:[0-9]+]] [[BlockGlb2]] |
80 | | -; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit2:[0-9]+]] [[BlockLit2Tmp]] |
| 79 | +; CHECK-SPIRV: PtrCastToGeneric {{[0-9]+}} [[BlockLit2Tmp:[0-9]+]] [[BlockGlb2]] |
| 80 | +; CHECK-SPIRV: Bitcast [[Int8PtrGenTy]] [[BlockLit2:[0-9]+]] [[BlockLit2Tmp]] |
81 | 81 | ; CHECK-SPIRV: GetKernelPreferredWorkGroupSizeMultiple [[Int32Ty]] {{[0-9]+}} [[BlockKer2]] [[BlockLit2]] [[ConstInt8]] [[ConstInt8]] |
82 | 82 |
|
83 | 83 | ; CHECK-LLVM: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}) #1 |
84 | 84 |
|
85 | | - %1 = call i32 @__get_kernel_preferred_work_group_size_multiple_impl(i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*)) |
| 85 | + %1 = call i32 @__get_kernel_preferred_work_group_size_multiple_impl(ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke_2_kernel to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @__block_literal_global.1 to ptr addrspace(4))) |
86 | 86 |
|
87 | | -; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb3]] |
88 | | -; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit3:[0-9]+]] [[BlockLit3Tmp]] |
| 87 | +; CHECK-SPIRV: PtrCastToGeneric {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb3]] |
| 88 | +; CHECK-SPIRV: Bitcast [[Int8PtrGenTy]] [[BlockLit3:[0-9]+]] [[BlockLit3Tmp]] |
89 | 89 | ; CHECK-SPIRV: GetKernelNDrangeMaxSubGroupSize [[Int32Ty]] {{[0-9]+}} [[NDRange]] [[BlockKer3]] [[BlockLit3]] [[ConstInt8]] [[ConstInt8]] |
90 | 90 |
|
91 | 91 | ; CHECK-LLVM: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(ptr {{.*}}, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}) |
92 | 92 |
|
93 | | - %2 = call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.2 to i8 addrspace(1)*) to i8 addrspace(4)*)) |
| 93 | + %2 = call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(ptr %ndrange, ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke_3_kernel to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @__block_literal_global.2 to ptr addrspace(4))) |
94 | 94 |
|
95 | | -; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit4Tmp:[0-9]+]] [[BlockGlb4]] |
96 | | -; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit4:[0-9]+]] [[BlockLit4Tmp]] |
| 95 | +; CHECK-SPIRV: PtrCastToGeneric {{[0-9]+}} [[BlockLit4Tmp:[0-9]+]] [[BlockGlb4]] |
| 96 | +; CHECK-SPIRV: Bitcast [[Int8PtrGenTy]] [[BlockLit4:[0-9]+]] [[BlockLit4Tmp]] |
97 | 97 | ; CHECK-SPIRV: GetKernelNDrangeSubGroupCount [[Int32Ty]] {{[0-9]+}} [[NDRange]] [[BlockKer4]] [[BlockLit4]] [[ConstInt8]] [[ConstInt8]] |
98 | 98 |
|
99 | 99 | ; CHECK-LLVM: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(ptr {{.*}}, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}) |
100 | 100 |
|
101 | | - %3 = call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.3 to i8 addrspace(1)*) to i8 addrspace(4)*)) |
| 101 | + %3 = call i32 @__get_kernel_sub_group_count_for_ndrange_impl(ptr %ndrange, ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke_4_kernel to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @__block_literal_global.3 to ptr addrspace(4))) |
102 | 102 | ret void |
103 | 103 | } |
104 | 104 |
|
105 | 105 | ; Function Attrs: convergent noinline nounwind optnone |
106 | | -define internal spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %.block_descriptor) #1 { |
| 106 | +define internal spir_func void @__device_side_enqueue_block_invoke(ptr addrspace(4) %.block_descriptor) #1 { |
107 | 107 | entry: |
108 | | - %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 |
109 | | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 |
110 | | - store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 |
111 | | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* |
112 | | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 |
| 108 | + %.block_descriptor.addr = alloca ptr addrspace(4), align 4 |
| 109 | + %block.addr = alloca ptr addrspace(4), align 4 |
| 110 | + store ptr addrspace(4) %.block_descriptor, ptr %.block_descriptor.addr, align 4 |
| 111 | + store ptr addrspace(4) %.block_descriptor, ptr %block.addr, align 4 |
113 | 112 | ret void |
114 | 113 | } |
115 | 114 |
|
116 | 115 | ; Function Attrs: nounwind |
117 | | -define internal spir_kernel void @__device_side_enqueue_block_invoke_kernel(i8 addrspace(4)*) #2 { |
| 116 | +define internal spir_kernel void @__device_side_enqueue_block_invoke_kernel(ptr addrspace(4)) #2 { |
118 | 117 | entry: |
119 | | - call void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %0) |
| 118 | + call void @__device_side_enqueue_block_invoke(ptr addrspace(4) %0) |
120 | 119 | ret void |
121 | 120 | } |
122 | 121 |
|
123 | | -declare i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)*, i8 addrspace(4)*) |
| 122 | +declare i32 @__get_kernel_work_group_size_impl(ptr addrspace(4), ptr addrspace(4)) |
124 | 123 |
|
125 | 124 | ; Function Attrs: convergent noinline nounwind optnone |
126 | | -define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #1 { |
| 125 | +define internal spir_func void @__device_side_enqueue_block_invoke_2(ptr addrspace(4) %.block_descriptor) #1 { |
127 | 126 | entry: |
128 | | - %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 |
129 | | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 |
130 | | - store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 |
131 | | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* |
132 | | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 |
| 127 | + %.block_descriptor.addr = alloca ptr addrspace(4), align 4 |
| 128 | + %block.addr = alloca ptr addrspace(4), align 4 |
| 129 | + store ptr addrspace(4) %.block_descriptor, ptr %.block_descriptor.addr, align 4 |
| 130 | + store ptr addrspace(4) %.block_descriptor, ptr %block.addr, align 4 |
133 | 131 | ret void |
134 | 132 | } |
135 | 133 |
|
136 | 134 | ; Function Attrs: nounwind |
137 | | -define internal spir_kernel void @__device_side_enqueue_block_invoke_2_kernel(i8 addrspace(4)*) #2 { |
| 135 | +define internal spir_kernel void @__device_side_enqueue_block_invoke_2_kernel(ptr addrspace(4)) #2 { |
138 | 136 | entry: |
139 | | - call void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %0) |
| 137 | + call void @__device_side_enqueue_block_invoke_2(ptr addrspace(4) %0) |
140 | 138 | ret void |
141 | 139 | } |
142 | 140 |
|
143 | | -declare i32 @__get_kernel_preferred_work_group_size_multiple_impl(i8 addrspace(4)*, i8 addrspace(4)*) |
| 141 | +declare i32 @__get_kernel_preferred_work_group_size_multiple_impl(ptr addrspace(4), ptr addrspace(4)) |
144 | 142 |
|
145 | 143 | ; Function Attrs: convergent noinline nounwind optnone |
146 | | -define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspace(4)* %.block_descriptor) #1 { |
| 144 | +define internal spir_func void @__device_side_enqueue_block_invoke_3(ptr addrspace(4) %.block_descriptor) #1 { |
147 | 145 | entry: |
148 | | - %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 |
149 | | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 |
150 | | - store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 |
151 | | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* |
152 | | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 |
| 146 | + %.block_descriptor.addr = alloca ptr addrspace(4), align 4 |
| 147 | + %block.addr = alloca ptr addrspace(4), align 4 |
| 148 | + store ptr addrspace(4) %.block_descriptor, ptr %.block_descriptor.addr, align 4 |
| 149 | + store ptr addrspace(4) %.block_descriptor, ptr %block.addr, align 4 |
153 | 150 | ret void |
154 | 151 | } |
155 | 152 |
|
156 | 153 | ; Function Attrs: nounwind |
157 | | -define internal spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*) #2 { |
| 154 | +define internal spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(ptr addrspace(4)) #2 { |
158 | 155 | entry: |
159 | | - call void @__device_side_enqueue_block_invoke_3(i8 addrspace(4)* %0) |
| 156 | + call void @__device_side_enqueue_block_invoke_3(ptr addrspace(4) %0) |
160 | 157 | ret void |
161 | 158 | } |
162 | 159 |
|
163 | | -declare i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t*, i8 addrspace(4)*, i8 addrspace(4)*) |
| 160 | +declare i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(ptr, ptr addrspace(4), ptr addrspace(4)) |
164 | 161 |
|
165 | 162 | ; Function Attrs: convergent noinline nounwind optnone |
166 | | -define internal spir_func void @__device_side_enqueue_block_invoke_4(i8 addrspace(4)* %.block_descriptor) #1 { |
| 163 | +define internal spir_func void @__device_side_enqueue_block_invoke_4(ptr addrspace(4) %.block_descriptor) #1 { |
167 | 164 | entry: |
168 | | - %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 |
169 | | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 |
170 | | - store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 |
171 | | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* |
172 | | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 |
| 165 | + %.block_descriptor.addr = alloca ptr addrspace(4), align 4 |
| 166 | + %block.addr = alloca ptr addrspace(4), align 4 |
| 167 | + store ptr addrspace(4) %.block_descriptor, ptr %.block_descriptor.addr, align 4 |
| 168 | + store ptr addrspace(4) %.block_descriptor, ptr %block.addr, align 4 |
173 | 169 | ret void |
174 | 170 | } |
175 | 171 |
|
176 | 172 | ; Function Attrs: nounwind |
177 | | -define internal spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*) #2 { |
| 173 | +define internal spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(ptr addrspace(4)) #2 { |
178 | 174 | entry: |
179 | | - call void @__device_side_enqueue_block_invoke_4(i8 addrspace(4)* %0) |
| 175 | + call void @__device_side_enqueue_block_invoke_4(ptr addrspace(4) %0) |
180 | 176 | ret void |
181 | 177 | } |
182 | 178 |
|
183 | | -declare i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t*, i8 addrspace(4)*, i8 addrspace(4)*) |
| 179 | +declare i32 @__get_kernel_sub_group_count_for_ndrange_impl(ptr, ptr addrspace(4), ptr addrspace(4)) |
184 | 180 |
|
185 | 181 | ; CHECK-SPIRV-DAG: Function [[VoidTy]] [[BlockKer1]] 0 [[BlockKerTy]] |
186 | 182 | ; CHECK-SPIRV-DAG: Function [[VoidTy]] [[BlockKer2]] 0 [[BlockKerTy]] |
|
0 commit comments