diff --git a/lib/Compiler.cpp b/lib/Compiler.cpp index c32216293..12aeefb45 100644 --- a/lib/Compiler.cpp +++ b/lib/Compiler.cpp @@ -525,6 +525,10 @@ int RunPassPipeline(llvm::Module &M, llvm::raw_svector_ostream *binaryStream) { pm.addPass(clspv::AutoPodArgsPass()); pm.addPass(clspv::DeclarePushConstantsPass()); pm.addPass(clspv::DefineOpenCLWorkItemBuiltinsPass()); + // Replace the LLVM intrinsics. This will give them a chance to be better + // optimized through the pipeline. It also helps with generic address space + // lowering. + pm.addPass(clspv::ReplaceLLVMIntrinsicsPass()); // RewritePackedStructsPass will rewrite packed struct types, and // ReplacePointerBitcastPass will lower the new packed struct type. So, diff --git a/test/CPlusPlus/issue-357.cl b/test/CPlusPlus/issue-357.cl index cb908880f..a74d274a0 100644 --- a/test/CPlusPlus/issue-357.cl +++ b/test/CPlusPlus/issue-357.cl @@ -62,6 +62,11 @@ __kernel void testCopyInstance2(__global InstanceTest* dst) __local InstanceTest instances[16]; const size_t index = get_global_id(0); + + if (index < 16) { + instances[index].init(); + } + if (index < 16) { dst[index] = instances[index]; } diff --git a/test/LLVMIntrinsics/descend_into_array.cl b/test/LLVMIntrinsics/descend_into_array.cl index 7aa6ec1bf..ca22cf93d 100644 --- a/test/LLVMIntrinsics/descend_into_array.cl +++ b/test/LLVMIntrinsics/descend_into_array.cl @@ -18,7 +18,7 @@ __kernel void foo(__global outer* out, global outer* in) { // CHECK: [[zero:%[0-9a-zA-Z_]+]] = OpConstant {{.*}} 0 // CHECK: [[dst:%[0-9a-zA-Z_]+]] = OpVariable {{.*}} StorageBuffer // CHECK: [[src:%[0-9a-zA-Z_]+]] = OpVariable {{.*}} StorageBuffer +// CHECK: [[dst_gep:%[0-9a-zA-Z_]+]] = OpAccessChain {{.*}} [[dst]] [[zero]] [[zero]] // CHECK: [[src_gep:%[0-9a-zA-Z_]+]] = OpAccessChain {{.*}} [[src]] [[zero]] [[zero]] // CHECK: OpLoad {{.*}} [[src_gep]] -// CHECK: [[dst_gep:%[0-9a-zA-Z_]+]] = OpAccessChain {{.*}} [[dst]] [[zero]] [[zero]] // CHECK: OpStore [[dst_gep]] diff --git a/test/LLVMIntrinsics/issue-1355.cl b/test/LLVMIntrinsics/issue-1355.cl new file mode 100644 index 000000000..482dd7d48 --- /dev/null +++ b/test/LLVMIntrinsics/issue-1355.cl @@ -0,0 +1,17 @@ +// RUN: clspv %target %s -o %t.spv -cl-std=CLC++ -inline-entry-points +// RUN: spirv-dis -o %t2.spvasm %t.spv +// RUN: FileCheck %s < %t2.spvasm +// RUN: spirv-val --target-env vulkan1.0 %t.spv + +// CHECK: [[src_ptr:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_StorageBuffer_uint +// CHECK: [[dst_ptr:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_StorageBuffer_uint +// CHECK: [[data:%[a-zA-Z0-9_]+]] = OpLoad %uint [[src_ptr]] +// CHECK: OpStore [[dst_ptr]] [[data]] + +kernel void kern(global int *ptr, global int *out) { + int priv = {}; + int *ppriv = &priv; + int *pglob = out; + __builtin_memcpy(pglob, ptr, 1 * sizeof(int)); + __builtin_memcpy(ppriv, ptr, 1 * sizeof(int)); +} diff --git a/test/LLVMIntrinsics/memcpy_from_constant.cl b/test/LLVMIntrinsics/memcpy_from_constant.cl index de0061f35..a6d34d5f1 100644 --- a/test/LLVMIntrinsics/memcpy_from_constant.cl +++ b/test/LLVMIntrinsics/memcpy_from_constant.cl @@ -16,6 +16,8 @@ void kernel memcpy_from_constant(global float* result) { // CHECK-DAG: [[ptr_ssbo_struct:%[a-zA-Z0-9_]+]] = OpTypePointer StorageBuffer [[struct]] // CHECK-DAG: [[uint:%[a-zA-Z0-9_]+]] = OpTypeInt 32 0 // CHECK-DAG: [[ptr_ssbo_float:%[a-zA-Z0-9_]+]] = OpTypePointer StorageBuffer [[float]] +// CHECK-DAG: [[uint_5:%[a-zA-Z0-9_]+]] = OpConstant [[uint]] 5 +// CHECK-DAG: [[arr_float:%[a-zA-Z0-9_]+]] = OpTypeArray [[float]] [[uint_5]] // CHECK-DAG: [[uint_0:%[a-zA-Z0-9_]+]] = OpConstant [[uint]] 0 // CHECK-DAG: [[uint_4:%[a-zA-Z0-9_]+]] = OpConstant [[uint]] 4 // CHECK-DAG: [[uint_1:%[a-zA-Z0-9_]+]] = OpConstant [[uint]] 1 @@ -27,14 +29,26 @@ void kernel memcpy_from_constant(global float* result) { // CHECK-DAG: [[float_0:%[a-zA-Z0-9_]+]] = OpConstant [[float]] 0 // CHECK-DAG: [[float_1:%[a-zA-Z0-9_]+]] = OpConstant [[float]] 1 // CHECK-DAG: [[float_2:%[a-zA-Z0-9_]+]] = OpConstant [[float]] 2 +// CHECK-DAG: [[arr_float_5:%[a-zA-Z0-9_]+]] = OpConstantComposite [[arr_float]] [[float_n2]] [[float_n1]] [[float_0]] [[float_1]] [[float_2]] + + +// CHECK: [[gep0:%[a-zA-Z0-9_]+]] = OpCompositeExtract [[float]] [[arr_float_5]] 0 +// CHECK: [[gep1:%[a-zA-Z0-9_]+]] = OpCompositeExtract [[float]] [[arr_float_5]] 1 +// CHECK: [[gep2:%[a-zA-Z0-9_]+]] = OpCompositeExtract [[float]] [[arr_float_5]] 2 +// CHECK: [[gep3:%[a-zA-Z0-9_]+]] = OpCompositeExtract [[float]] [[arr_float_5]] 3 +// CHECK: [[gep4:%[a-zA-Z0-9_]+]] = OpCompositeExtract [[float]] [[arr_float_5]] 4 // CHECK: [[ssbo_gep0:%[a-zA-Z0-9_]+]] = OpAccessChain [[ptr_ssbo_float]] [[ssbo]] [[uint_0]] [[uint_0]] -// CHECK: OpStore [[ssbo_gep0]] [[float_n2]] +// CHECK: OpStore [[ssbo_gep0]] [[gep0]] + // CHECK: [[ssbo_gep1:%[a-zA-Z0-9_]+]] = OpAccessChain [[ptr_ssbo_float]] [[ssbo]] [[uint_0]] [[uint_1]] -// CHECK: OpStore [[ssbo_gep1]] [[float_n1]] +// CHECK: OpStore [[ssbo_gep1]] [[gep1]] + // CHECK: [[ssbo_gep2:%[a-zA-Z0-9_]+]] = OpAccessChain [[ptr_ssbo_float]] [[ssbo]] [[uint_0]] [[uint_2]] -// CHECK: OpStore [[ssbo_gep2]] [[float_0]] +// CHECK: OpStore [[ssbo_gep2]] [[gep2]] + // CHECK: [[ssbo_gep3:%[a-zA-Z0-9_]+]] = OpAccessChain [[ptr_ssbo_float]] [[ssbo]] [[uint_0]] [[uint_3]] -// CHECK: OpStore [[ssbo_gep3]] [[float_1]] +// CHECK: OpStore [[ssbo_gep3]] [[gep3]] + // CHECK: [[ssbo_gep4:%[a-zA-Z0-9_]+]] = OpAccessChain [[ptr_ssbo_float]] [[ssbo]] [[uint_0]] [[uint_4]] -// CHECK: OpStore [[ssbo_gep4]] [[float_2]] +// CHECK: OpStore [[ssbo_gep4]] [[gep4]]