diff --git a/backends/vulkan/op_registry.py b/backends/vulkan/op_registry.py index f2b80c2e544..5aa805dc1b3 100644 --- a/backends/vulkan/op_registry.py +++ b/backends/vulkan/op_registry.py @@ -528,8 +528,6 @@ def register_view_op(features: OpFeatures): exir_ops.edge.aten.index_select.default, exir_ops.edge.aten.select_copy.int, # Tensor combination - exir_ops.edge.aten.split_with_sizes_copy.default, - exir_ops.edge.aten.split.Tensor, exir_ops.edge.aten.repeat.default, # Tensor creation exir_ops.edge.aten.arange.start_step, @@ -563,6 +561,8 @@ def register_ported_op(features: OpFeatures): exir_ops.edge.aten.permute_copy.default, # Tensor combination exir_ops.edge.aten.cat.default, + exir_ops.edge.aten.split_with_sizes_copy.default, + exir_ops.edge.aten.split.Tensor, ] ) def register_ported_op_all_packed_dims(features: OpFeatures): diff --git a/backends/vulkan/runtime/graph/ops/impl/Split.cpp b/backends/vulkan/runtime/graph/ops/impl/Split.cpp index b74317b078e..8002dadc538 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Split.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Split.cpp @@ -25,8 +25,6 @@ void add_split_with_sizes_default_node( ValueRef out_list_ref) { vTensorPtr t_in = graph.get_tensor(in); - VK_CHECK_COND(check_packed_dim_is(*t_in, WHCN::kChannelsDim)); - ValueListPtr out_list = graph.get_value_list(out_list_ref); DimIndex dim_index = normalize_to_dim_index(*t_in, dim); @@ -38,62 +36,60 @@ void add_split_with_sizes_default_node( ValueRef out_ref = (*out_list)[split_idx]; vTensorPtr t_out = graph.get_tensor(out_ref); - VK_CHECK_COND(check_packed_dim_is(*t_out, WHCN::kChannelsDim)); VK_CHECK_COND(dim_at(*t_out, dim_index) == split_size); } - if (dim_index == kWidth4D) { - utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false); - utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false); + const auto packed_dim = t_in->packed_dim(); + const auto packed_dim_index = static_cast(kWidth4D - packed_dim); - for (ValueRef out_ref : *out_list) { - // Doesn't need to use split_size since we have already verified that the - // output tensor's size matches with the split_size. - vTensorPtr t_out = graph.get_tensor(out_ref); - utils::ivec3 range = t_out->logical_limits(); - add_copy_offset_node( - graph, in, range, src_offset, dst_offset, out_ref, false, true); + // Index of dimension to be concatenated in (w, h, c * b) coordinate system + const auto dim_xyz_index = std::min(2, -dim_index - 1); - src_offset[0] += range[0]; - } - } else if (dim_index == kHeight4D) { - utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false); - utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false); + utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false); + utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false); - for (ValueRef out_ref : *out_list) { - vTensorPtr t_out = graph.get_tensor(out_ref); - utils::ivec3 range = t_out->logical_limits(); - add_copy_offset_node( - graph, in, range, src_offset, dst_offset, out_ref, false, true); + const bool is_splitting_channel = (dim_index == kChannel4D); - src_offset[1] += range[1]; - } - } else if (dim_index == kBatch4D) { - utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false); - utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false); + // if splitting channels + if (is_splitting_channel) { + // set source offset w as channel size of the input tensor + src_offset[3] = dim_at(t_in->sizes(), kChannel4D); + } - for (ValueRef out_ref : *out_list) { - vTensorPtr t_out = graph.get_tensor(out_ref); - utils::ivec3 range = t_out->logical_limits(); + for (ValueRef out_ref : *out_list) { + // Doesn't need to use split_size since we have already verified that the + // output tensor's size matches with the split_size. + vTensorPtr t_out = graph.get_tensor(out_ref); + const auto out_channel_size = dim_at(t_out->sizes(), kChannel4D); + utils::ivec3 range = t_out->logical_limits(); + + if (dim_index == packed_dim_index) { + // if splitting channels, use add_copy_channel_offset_node function as + // add_copy_packed_dim_offset_node does not support channel packing + if (is_splitting_channel) { + add_copy_channel_offset_node( + graph, in, out_channel_size, src_offset[2], dst_offset[2], out_ref); + src_offset[dim_xyz_index] += out_channel_size; + } else { + // dst_offset[3] is not used now but will be used in the future when + // add_copy_packed_dim_offset_node will support channel packing + // + // set destination offset w as channel size of the output tensor if + // splitting channel + dst_offset[3] = is_splitting_channel ? out_channel_size : 0; + add_copy_packed_dim_offset_node( + graph, in, range, src_offset, dst_offset, out_ref); + src_offset[dim_xyz_index] += dim_at(t_out->sizes(), packed_dim_index); + } + } else { + // set destination offset w as channel size of the output tensor if + // splitting channels + dst_offset[3] = is_splitting_channel ? out_channel_size : 0; add_copy_offset_node( graph, in, range, src_offset, dst_offset, out_ref, false, true); - - src_offset[2] += range[2]; - } - } else if (dim_index == kChannel4D) { - int32_t src_offset = 0; - int32_t dst_offset = 0; - - for (ValueRef out_ref : *out_list) { - vTensorPtr t_out = graph.get_tensor(out_ref); - int32_t range = dim_at(t_out->sizes()); - add_copy_channel_offset_node( - graph, in, range, src_offset, dst_offset, out_ref); - src_offset += range; + src_offset[dim_xyz_index] += + is_splitting_channel ? out_channel_size : range[dim_xyz_index]; } - - } else { - VK_THROW("not ipmlemented"); } } diff --git a/backends/vulkan/test/op_tests/cases.py b/backends/vulkan/test/op_tests/cases.py index e4f7ac15434..41d8edf1f25 100644 --- a/backends/vulkan/test/op_tests/cases.py +++ b/backends/vulkan/test/op_tests/cases.py @@ -922,14 +922,20 @@ def get_split_with_sizes_inputs(): Test = namedtuple("VkSliceTest", ["self", "sizes", "dim"]) test_cases = [ # Split on Width + Test(self=(S1, 7, 10, 11), sizes=[1, 3, 2, 5], dim=3), Test(self=(S1, 7, 10, 10), sizes=[1, 2, 3, 4], dim=3), + Test(self=(7, 10, 11), sizes=[1, 3, 2, 5], dim=2), Test(self=(7, 10, 10), sizes=[1, 2, 3, 4], dim=2), + Test(self=(7, 10, 11), sizes=[3, 8], dim=2), Test(self=(7, 10, 10), sizes=[1, 9], dim=2), Test(self=(10, 10), sizes=[1, 9], dim=1), Test(self=(10,), sizes=[1, 9], dim=0), # Split on Height + Test(self=(S1, 7, 11, 10), sizes=[1, 3, 2, 5], dim=2), Test(self=(S1, 7, 10, 10), sizes=[1, 2, 3, 4], dim=2), + Test(self=(7, 11, 10), sizes=[1, 3, 2, 5], dim=1), Test(self=(7, 10, 10), sizes=[1, 2, 3, 4], dim=1), + Test(self=(7, 11, 11), sizes=[3, 8], dim=1), Test(self=(7, 10, 10), sizes=[10], dim=1), Test(self=(7, 6, 10), sizes=[1, 1, 1, 1, 1, 1], dim=1), Test(self=(10, 10), sizes=[1, 2, 3, 4], dim=0), @@ -937,8 +943,11 @@ def get_split_with_sizes_inputs(): Test(self=(10, 7, 10, 10), sizes=[3, 6, 1], dim=0), Test(self=(10, 7, 10, 10), sizes=[10], dim=0), # Split on Channel + Test(self=(7, 13, 4, 8), sizes=[3, 5, 2, 3], dim=1), Test(self=(7, 13, 4, 8), sizes=[3, 6, 1, 3], dim=1), + Test(self=(7, 13, 4, 8), sizes=[3, 2, 2, 5, 1], dim=1), Test(self=(7, 13, 4, 8), sizes=[3, 3, 3, 3, 1], dim=1), + Test(self=(13, 4, 8), sizes=[3, 5, 2, 1, 2], dim=0), Test(self=(13, 4, 8), sizes=[3, 3, 3, 3, 1], dim=0), Test(self=(13, 4, 8), sizes=[2, 9, 2], dim=0), Test(self=(13, 4, 8), sizes=[13], dim=0), @@ -946,6 +955,8 @@ def get_split_with_sizes_inputs(): test_suite = VkTestSuite([tuple(tc) for tc in test_cases]) test_suite.layouts = [ + "utils::kWidthPacked", + "utils::kHeightPacked", "utils::kChannelsPacked", ] test_suite.data_gen = "make_seq_tensor" @@ -997,6 +1008,8 @@ def get_split_tensor_inputs(): ) test_suite.layouts = [ + "utils::kWidthPacked", + "utils::kHeightPacked", "utils::kChannelsPacked", ] test_suite.data_gen = "make_seq_tensor"