diff --git a/kernels/aten/functions.yaml b/kernels/aten/functions.yaml index 463ef0f9d32..5f4b297cdde 100644 --- a/kernels/aten/functions.yaml +++ b/kernels/aten/functions.yaml @@ -403,6 +403,8 @@ - op: unbind_copy.int_out +- op: unfold_copy.out + - op: unsafe_split.Tensor_out - op: unsqueeze_copy.dim_out diff --git a/kernels/portable/cpu/op_unfold_copy.cpp b/kernels/portable/cpu/op_unfold_copy.cpp new file mode 100644 index 00000000000..69ddb3368d7 --- /dev/null +++ b/kernels/portable/cpu/op_unfold_copy.cpp @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include +namespace torch { +namespace executor { +namespace native { + +using Tensor = executorch::aten::Tensor; + +// unfold_copy(Tensor self, int dimension, int size, int step, *, Tensor(a!) +// out) -> Tensor(a!) +Tensor unfold_copy_out( + KernelRuntimeContext& ctx, + const Tensor& self, + int64_t dim, + int64_t size, + int64_t step, + Tensor& out) { + (void)ctx; + // Check if dimension is valid + ET_KERNEL_CHECK( + ctx, check_unfold_copy_args(self, dim, size, step), InvalidArgument, out); + if (dim < 0) { + dim += nonzero_dim(self); + } + // Calculate output size + // @lint-ignore CLANGTIDY facebook-hte-CArray + Tensor::SizesType expected_output_size[kTensorDimensionLimit]; + size_t expected_out_dim = 0; + + get_unfold_copy_out_target_size( + self, dim, size, step, expected_output_size, &expected_out_dim); + + ET_KERNEL_CHECK( + ctx, + resize_tensor(out, {expected_output_size, expected_out_dim}) == Error::Ok, + InvalidArgument, + out); + + // Copy data + const size_t leading_dims = getLeadingDims(self, dim); + const size_t trailing_dims = getTrailingDims(self, dim); + ScalarType in_type = self.scalar_type(); + ScalarType out_type = out.scalar_type(); + + ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, "unfold_copy.out", CTYPE_IN, [&]() { + const CTYPE_IN* input_ptr = self.const_data_ptr(); + ET_SWITCH_REALHBBF16_TYPES( + out_type, ctx, "unfold_copy.out", CTYPE_OUT, [&] { + CTYPE_OUT* out_ptr = out.mutable_data_ptr(); + for (const auto i : c10::irange(leading_dims)) { + const CTYPE_IN* src = + input_ptr + i * self.size(dim) * trailing_dims; + for (const auto j : c10::irange(out.size(dim))) { + const CTYPE_IN* dim_src = src + j * step * trailing_dims; + for (const auto k : c10::irange(trailing_dims)) { + for (const auto l : c10::irange(size)) { + *out_ptr = convert( + dim_src[k + l * trailing_dims]); + out_ptr++; + } + } + } + } + }); + }); + return out; +} +} // namespace native +} // namespace executor +} // namespace torch diff --git a/kernels/portable/cpu/util/copy_ops_util.cpp b/kernels/portable/cpu/util/copy_ops_util.cpp index bd01a1be329..229fba2dad0 100644 --- a/kernels/portable/cpu/util/copy_ops_util.cpp +++ b/kernels/portable/cpu/util/copy_ops_util.cpp @@ -964,5 +964,46 @@ void get_diagonal_copy_out_target_size( out_sizes[in.dim() - 2] = diagonal_size; } +bool check_unfold_copy_args( + const Tensor& self, + int64_t dim, + int64_t size, + int64_t step) { + if (dim < 0) { + dim += nonzero_dim(self); + } + ET_LOG_AND_RETURN_IF_FALSE(tensor_has_dim(self, dim)); + ET_CHECK_OR_RETURN_FALSE( + size >= 0, "size is %" PRId64 " but must be >= 0", size); + ET_CHECK_OR_RETURN_FALSE( + size <= self.size(dim), + "maximum size for tensor at dimension %" PRId64 + " is %zd but size is %" PRId64, + dim, + self.size(dim), + size); + ET_CHECK_OR_RETURN_FALSE( + step > 0, "step is %" PRId64 " but must be > 0", step); + return true; +} + +void get_unfold_copy_out_target_size( + const Tensor& self, + int64_t dim, + int64_t size, + int64_t step, + executorch::aten::SizesType* out_sizes, + size_t* out_ndim) { + for (auto i : c10::irange(self.dim())) { + out_sizes[i] = self.size(i); + } + // At `dim` dimension, we split the tensor into `size` chunks with `step` + // stride. + out_sizes[dim] = (self.size(dim) - size + step) / step; + + out_sizes[self.dim()] = size; + *out_ndim = self.dim() + 1; +} + } // namespace executor } // namespace torch diff --git a/kernels/portable/cpu/util/copy_ops_util.h b/kernels/portable/cpu/util/copy_ops_util.h index e7399ae0956..edcc6eb0021 100644 --- a/kernels/portable/cpu/util/copy_ops_util.h +++ b/kernels/portable/cpu/util/copy_ops_util.h @@ -233,5 +233,19 @@ void get_diagonal_copy_out_target_size( executorch::aten::SizesType* out_sizes, size_t* out_ndim); +bool check_unfold_copy_args( + const Tensor& self, + int64_t dim, + int64_t size, + int64_t step); + +void get_unfold_copy_out_target_size( + const Tensor& self, + int64_t dim, + int64_t size, + int64_t step, + executorch::aten::SizesType* out_sizes, + size_t* out_ndim); + } // namespace executor } // namespace torch diff --git a/kernels/portable/functions.yaml b/kernels/portable/functions.yaml index 3221b8fe349..567d062d573 100644 --- a/kernels/portable/functions.yaml +++ b/kernels/portable/functions.yaml @@ -917,6 +917,11 @@ - arg_meta: null kernel_name: torch::executor::unbind_copy_int_out +- op: unfold_copy.out + kernels: + - arg_meta: null + kernel_name: torch::executor::unfold_copy_out + - op: unsqueeze_copy.out kernels: - arg_meta: null diff --git a/kernels/test/op_unfold_copy_test.cpp b/kernels/test/op_unfold_copy_test.cpp new file mode 100644 index 00000000000..ef3c09c10e3 --- /dev/null +++ b/kernels/test/op_unfold_copy_test.cpp @@ -0,0 +1,129 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include // Declares the operator +#include +#include +#include +#include + +#include + +#include + +using namespace ::testing; +using executorch::aten::ScalarType; +using executorch::aten::Tensor; +using torch::executor::testing::TensorFactory; + +class OpUnfoldTest : public OperatorTest { + protected: + Tensor& op_unfold_copy_out( + const Tensor& self, + int64_t dim, + int64_t size, + int64_t step, + Tensor& out) { + return torch::executor::aten::unfold_copy_outf( + context_, self, dim, size, step, out); + } + + template + void test_unfold_copy_dtype() { + TensorFactory tf; + + auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9}); + auto expected = tf.make({3, 2, 2}, {1, 2, 2, 3, 4, 5, 5, 6, 7, 8, 8, 9}); + auto actual_out = tf.zeros_like(expected); + op_unfold_copy_out(input, /*dim=*/1, /*size=*/2, /*step=*/1, actual_out); + EXPECT_TENSOR_CLOSE(actual_out, expected); + } +}; + +TEST_F(OpUnfoldTest, SmokeTest) { + TensorFactory tf; + const auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9}); + const auto expected = tf.make({3, 1, 2}, {1, 2, 4, 5, 7, 8}); + auto output = tf.zeros_like(expected); + + op_unfold_copy_out(input, /*dim=*/1, /*size=*/2, /*step=*/2, output); + EXPECT_TENSOR_CLOSE(output, expected); +} + +TEST_F(OpUnfoldTest, DType) { +#define TEST_ENTRY(ctype, dtype) \ + test_unfold_copy_dtype(); + ET_FORALL_REALHBF16_TYPES(TEST_ENTRY); +#undef TEST_ENTRY +} + +TEST_F(OpUnfoldTest, ZeroDimension) { + TensorFactory tf; + const auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9}); + const auto expected = + tf.make({2, 3, 2}, {1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 9}); + auto output = tf.zeros_like(expected); + + op_unfold_copy_out(input, /*dim=*/0, /*size=*/2, /*step=*/1, output); + EXPECT_TENSOR_CLOSE(output, expected); +} + +TEST_F(OpUnfoldTest, NegativeDimension) { + TensorFactory tf; + const auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9}); + const auto expected = tf.make({3, 1, 2}, {1, 2, 4, 5, 7, 8}); + auto output = tf.zeros_like(expected); + + op_unfold_copy_out(input, /*dim=*/-1, /*size=*/2, /*step=*/2, output); + EXPECT_TENSOR_CLOSE(output, expected); +} + +TEST_F(OpUnfoldTest, LargeStep) { + TensorFactory tf; + const auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9}); + const auto expected = tf.make({3, 1, 2}, {1, 2, 4, 5, 7, 8}); + auto output = tf.zeros_like(expected); + + op_unfold_copy_out(input, /*dim=*/-1, /*size=*/2, /*step=*/5, output); + EXPECT_TENSOR_CLOSE(output, expected); +} + +TEST_F(OpUnfoldTest, ZeroSize) { + TensorFactory tf; + const auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9}); + const auto expected = tf.make({3, 4, 0}, {}); + auto output = tf.zeros_like(expected); + + op_unfold_copy_out(input, /*dim=*/1, /*size=*/0, /*step=*/1, output); + EXPECT_TENSOR_CLOSE(output, expected); +} + +TEST_F(OpUnfoldTest, NegativeSizeAndNegativeStepDies) { + TensorFactory tf; + const auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9}); + auto output = tf.zeros({3, 1, 2}); + + ET_EXPECT_KERNEL_FAILURE( + context_, + op_unfold_copy_out(input, /*dim=*/1, /*size=*/-1, /*step=*/1, output)); + ET_EXPECT_KERNEL_FAILURE( + context_, + op_unfold_copy_out(input, /*dim=*/1, /*size=*/1, /*step=*/-1, output)); +} + +TEST_F(OpUnfoldTest, InvalidDimAndSizeTooLargeDies) { + TensorFactory tf; + const auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9}); + auto output = tf.zeros({3, 1, 2}); + ET_EXPECT_KERNEL_FAILURE( + context_, + op_unfold_copy_out(input, /*dim=*/3, /*size=*/2, /*step=*/1, output)); + ET_EXPECT_KERNEL_FAILURE( + context_, + op_unfold_copy_out(input, /*dim=*/1, /*size=*/10, /*step=*/1, output)); +} diff --git a/kernels/test/targets.bzl b/kernels/test/targets.bzl index 91f2121bebc..0d52a3d2d62 100644 --- a/kernels/test/targets.bzl +++ b/kernels/test/targets.bzl @@ -324,6 +324,7 @@ def define_common_targets(): _common_op_test("op_tril_test", ["aten", "portable"]) _common_op_test("op_trunc_test", ["aten", "portable"]) _common_op_test("op_unbind_copy_test", ["aten", "portable"]) + _common_op_test("op_unfold_copy_test", ["aten", "portable"]) _common_op_test("op_unsqueeze_copy_test", ["aten", "portable"]) _common_op_test("op_upsample_bilinear2d_test", ["aten", "portable"]) _common_op_test("op_upsample_nearest2d_test", ["aten", "portable"]) diff --git a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl index f5ddae06b6a..8245f8d345f 100644 --- a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl +++ b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl @@ -1223,6 +1223,12 @@ ATEN_OPS = ( "//executorch/kernels/portable/cpu/util:copy_ops_util", ], ), + op_target( + name = "op_unfold_copy", + deps = [ + "//executorch/kernels/portable/cpu/util:copy_ops_util", + ], + ), op_target( name = "op_unsqueeze_copy", deps = [