diff --git a/src/layer/vulkan/shader/unaryop.comp b/src/layer/vulkan/shader/unaryop.comp index bd16cc17b171..524e911fe8fa 100644 --- a/src/layer/vulkan/shader/unaryop.comp +++ b/src/layer/vulkan/shader/unaryop.comp @@ -4,39 +4,25 @@ #version 450 layout (constant_id = 0) const int op_type = 0; +layout (constant_id = 1) const uint n = 0; -#define shape_constant_id_offset 1 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; +layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (push_constant) uniform parameter { - int dims; - int w; - int h; - int c; - int cstep; + uint n; } p; void main() { - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); + const uint gi = gl_GlobalInvocationID.x; - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) + if (gi >= psc(n)) return; - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afp v = buffer_ld1(bottom_top_blob_data, gi); + afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - afp res; + afpvec4 res; if (op_type == 0) res = abs(v); if (op_type == 1) res = -v; @@ -63,5 +49,5 @@ void main() if (op_type == 18) res = round(v); if (op_type == 19) res = trunc(v); - buffer_st1(bottom_top_blob_data, gi, res); + buffer_st4(bottom_top_blob_data, gi, res); } diff --git a/src/layer/vulkan/shader/unaryop_pack4.comp b/src/layer/vulkan/shader/unaryop_pack4.comp deleted file mode 100644 index 7c1f7c181137..000000000000 --- a/src/layer/vulkan/shader/unaryop_pack4.comp +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2019 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const int op_type = 0; - -#define shape_constant_id_offset 1 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - - afpvec4 res; - - if (op_type == 0) res = abs(v); - if (op_type == 1) res = -v; - if (op_type == 2) res = floor(v); - if (op_type == 3) res = ceil(v); - if (op_type == 4) res = v * v; - if (op_type == 5) res = sqrt(v); - if (op_type == 6) res = inversesqrt(v); - if (op_type == 7) res = exp(v); - if (op_type == 8) res = log(v); - if (op_type == 9) res = sin(v); - if (op_type == 10) res = cos(v); - if (op_type == 11) res = tan(v); - if (op_type == 12) res = asin(v); - if (op_type == 13) res = acos(v); - if (op_type == 14) res = atan(v); - if (op_type == 15) res = afp(1.f) / v; -#if NCNN_moltenvk - if (op_type == 16) res = afpvec4(tanh(vec4(v))); -#else - if (op_type == 16) res = tanh(v); -#endif - if (op_type == 17) res = log(v) * afp(0.434294481903); - if (op_type == 18) res = round(v); - if (op_type == 19) res = trunc(v); - - buffer_st4(bottom_top_blob_data, gi, res); -} diff --git a/src/layer/vulkan/shader/unaryop_pack8.comp b/src/layer/vulkan/shader/unaryop_pack8.comp deleted file mode 100644 index dbf56b04e94e..000000000000 --- a/src/layer/vulkan/shader/unaryop_pack8.comp +++ /dev/null @@ -1,148 +0,0 @@ -// Copyright 2020 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const int op_type = 0; - -#define shape_constant_id_offset 1 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); - - afpvec8 res; - - if (op_type == 0) - { - res[0] = abs(v[0]); - res[1] = abs(v[1]); - } - if (op_type == 1) - { - res[0] = -v[0]; - res[1] = -v[1]; - } - if (op_type == 2) - { - res[0] = floor(v[0]); - res[1] = floor(v[1]); - } - if (op_type == 3) - { - res[0] = ceil(v[0]); - res[1] = ceil(v[1]); - } - if (op_type == 4) - { - res[0] = v[0] * v[0]; - res[1] = v[1] * v[1]; - } - if (op_type == 5) - { - res[0] = sqrt(v[0]); - res[1] = sqrt(v[1]); - } - if (op_type == 6) - { - res[0] = inversesqrt(v[0]); - res[1] = inversesqrt(v[1]); - } - if (op_type == 7) - { - res[0] = exp(v[0]); - res[1] = exp(v[1]); - } - if (op_type == 8) - { - res[0] = log(v[0]); - res[1] = log(v[1]); - } - if (op_type == 9) - { - res[0] = sin(v[0]); - res[1] = sin(v[1]); - } - if (op_type == 10) - { - res[0] = cos(v[0]); - res[1] = cos(v[1]); - } - if (op_type == 11) - { - res[0] = tan(v[0]); - res[1] = tan(v[1]); - } - if (op_type == 12) - { - res[0] = asin(v[0]); - res[1] = asin(v[1]); - } - if (op_type == 13) - { - res[0] = acos(v[0]); - res[1] = acos(v[1]); - } - if (op_type == 14) - { - res[0] = atan(v[0]); - res[1] = atan(v[1]); - } - if (op_type == 15) - { - res[0] = afp(1.f) / v[0]; - res[1] = afp(1.f) / v[1]; - } - if (op_type == 16) - { -#if NCNN_moltenvk - res[0] = afpvec4(tanh(vec4(v[0]))); - res[1] = afpvec4(tanh(vec4(v[1]))); -#else - res[0] = tanh(v[0]); - res[1] = tanh(v[1]); -#endif - } - if (op_type == 17) - { - res[0] = log(v[0]) * afp(0.434294481903); - res[1] = log(v[1]) * afp(0.434294481903); - } - if (op_type == 18) - { - res[0] = round(v[0]); - res[1] = round(v[1]); - } - if (op_type == 19) - { - res[0] = trunc(v[0]); - res[1] = trunc(v[1]); - } - - buffer_st8(bottom_top_blob_data, gi, res); -} diff --git a/src/layer/vulkan/unaryop_vulkan.cpp b/src/layer/vulkan/unaryop_vulkan.cpp index 95654694c6a5..a191fe1b9561 100644 --- a/src/layer/vulkan/unaryop_vulkan.cpp +++ b/src/layer/vulkan/unaryop_vulkan.cpp @@ -12,92 +12,21 @@ UnaryOp_vulkan::UnaryOp_vulkan() support_vulkan = true; pipeline_unaryop = 0; - pipeline_unaryop_pack4 = 0; - pipeline_unaryop_pack8 = 0; } int UnaryOp_vulkan::create_pipeline(const Option& opt) { const Mat& shape = top_shapes.empty() ? Mat() : top_shapes[0]; - int elempack = 1; - if (shape.dims == 1) elempack = opt.use_shader_pack8 && shape.w % 8 == 0 ? 8 : shape.w % 4 == 0 ? 4 : 1; - if (shape.dims == 2) elempack = opt.use_shader_pack8 && shape.h % 8 == 0 ? 8 : shape.h % 4 == 0 ? 4 : 1; - if (shape.dims == 3 || shape.dims == 4) elempack = opt.use_shader_pack8 && shape.c % 8 == 0 ? 8 : shape.c % 4 == 0 ? 4 : 1; - - size_t elemsize; - if (opt.use_fp16_storage || opt.use_fp16_packed) - { - elemsize = elempack * 2u; - } - else - { - elemsize = elempack * 4u; - } - - Mat shape_packed; - if (shape.dims == 1) shape_packed = Mat(shape.w / elempack, (void*)0, elemsize, elempack); - if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack); - if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); - if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); - - std::vector specializations(1 + 5); + std::vector specializations(2); specializations[0].i = op_type; - specializations[1 + 0].i = shape_packed.dims; - specializations[1 + 1].i = shape_packed.w; - specializations[1 + 2].i = shape_packed.h * shape_packed.d; - specializations[1 + 3].i = shape_packed.c; - specializations[1 + 4].i = shape_packed.cstep; - - Mat local_size_xyz; - if (shape_packed.dims == 1) - { - local_size_xyz.w = std::min(64, shape_packed.w); - local_size_xyz.h = 1; - local_size_xyz.c = 1; - } - if (shape_packed.dims == 2) - { - local_size_xyz.w = std::min(8, shape_packed.w); - local_size_xyz.h = std::min(8, shape_packed.h); - local_size_xyz.c = 1; - } - if (shape_packed.dims == 3) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h); - local_size_xyz.c = std::min(4, shape_packed.c); - } - if (shape_packed.dims == 4) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); - local_size_xyz.c = std::min(4, shape_packed.c); - } - - // pack1 - if (shape.dims == 0 || elempack == 1) - { - pipeline_unaryop = new Pipeline(vkdev); - pipeline_unaryop->set_optimal_local_size_xyz(local_size_xyz); - pipeline_unaryop->create(LayerShaderType::unaryop, opt, specializations); - } - - // pack4 - if (shape.dims == 0 || elempack == 4) - { - pipeline_unaryop_pack4 = new Pipeline(vkdev); - pipeline_unaryop_pack4->set_optimal_local_size_xyz(local_size_xyz); - pipeline_unaryop_pack4->create(LayerShaderType::unaryop_pack4, opt, specializations); - } - - // pack8 - if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) - { - pipeline_unaryop_pack8 = new Pipeline(vkdev); - pipeline_unaryop_pack8->set_optimal_local_size_xyz(local_size_xyz); - pipeline_unaryop_pack8->create(LayerShaderType::unaryop_pack8, opt, specializations); - } + specializations[1].u32 = shape.total() / 4; + + const int local_size_x = vkdev->info.subgroup_size(); + + pipeline_unaryop = new Pipeline(vkdev); + pipeline_unaryop->set_optimal_local_size_xyz(local_size_x, 1, 1); + pipeline_unaryop->create(LayerShaderType::unaryop, opt, specializations); return 0; } @@ -107,34 +36,24 @@ int UnaryOp_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_unaryop; pipeline_unaryop = 0; - delete pipeline_unaryop_pack4; - pipeline_unaryop_pack4 = 0; - - delete pipeline_unaryop_pack8; - pipeline_unaryop_pack8 = 0; - return 0; } int UnaryOp_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const { - int elempack = bottom_top_blob.elempack; + const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; std::vector bindings(1); bindings[0] = bottom_top_blob; - std::vector constants(5); - constants[0].i = bottom_top_blob.dims; - constants[1].i = bottom_top_blob.w; - constants[2].i = bottom_top_blob.h * bottom_top_blob.d; - constants[3].i = bottom_top_blob.c; - constants[4].i = bottom_top_blob.cstep; - - const Pipeline* pipeline = elempack == 8 ? pipeline_unaryop_pack8 - : elempack == 4 ? pipeline_unaryop_pack4 - : pipeline_unaryop; + std::vector constants(1); + constants[0].u32 = n; - cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); + VkMat dispatcher; + dispatcher.w = n; + dispatcher.h = 1; + dispatcher.c = 1; + cmd.record_pipeline(pipeline_unaryop, bindings, constants, dispatcher); return 0; } diff --git a/src/layer/vulkan/unaryop_vulkan.h b/src/layer/vulkan/unaryop_vulkan.h index 8d10501e0659..9487eae7dea3 100644 --- a/src/layer/vulkan/unaryop_vulkan.h +++ b/src/layer/vulkan/unaryop_vulkan.h @@ -21,8 +21,6 @@ class UnaryOp_vulkan : public UnaryOp public: Pipeline* pipeline_unaryop; - Pipeline* pipeline_unaryop_pack4; - Pipeline* pipeline_unaryop_pack8; }; } // namespace ncnn