create et_view primop (#2553)

metascroy · facebook-github-bot · commit 4b0ed9194afd · 2024-04-02T14:45:41.000-07:00
Summary: Pull Request resolved: #2553 Implements a new view prim op kernel. bypass-github-export-checks Reviewed By: larryliu0820, cbilgin Differential Revision: D55099757 fbshipit-source-id: 92e44621f4d9b38ad6ecb2610cce4b765e650029
diff --git a/examples/selective_build/test_selective_build.sh b/examples/selective_build/test_selective_build.sh
@@ -32,9 +32,9 @@ test_buck2_select_ops_in_list() {
     ${PYTHON_EXECUTABLE} -m examples.portable.scripts.export --model_name="add_mul"
 
     echo "Running selective build test"
-    # set max_kernel_num=17: 14 primops, add, mul
+    # set max_kernel_num=18: 16 primops, add, mul
     $BUCK run //examples/selective_build:selective_build_test \
-        --config=executorch.max_kernel_num=17 \
+        --config=executorch.max_kernel_num=18 \
         --config=executorch.select_ops=list \
         -- --model_path=./add_mul.pte
 
@@ -100,11 +100,11 @@ test_cmake_select_ops_in_list() {
 
     local example_dir=examples/selective_build
     local build_dir=cmake-out/${example_dir}
-    # set MAX_KERNEL_NUM=17: 14 primops, add, mul
+    # set MAX_KERNEL_NUM=18: 16 primops, add, mul
     rm -rf ${build_dir}
     retry cmake -DBUCK2="$BUCK" \
             -DCMAKE_BUILD_TYPE=Release \
-            -DMAX_KERNEL_NUM=17 \
+            -DMAX_KERNEL_NUM=18 \
             -DEXECUTORCH_SELECT_OPS_LIST="aten::convolution.out,\
 aten::_native_batch_norm_legit_no_training.out,aten::hardtanh.out,aten::add.out,\
 aten::mean.out,aten::view_copy.out,aten::permute_copy.out,aten::addmm.out,\
diff --git a/kernels/prim_ops/et_view.cpp b/kernels/prim_ops/et_view.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/kernels/prim_ops/et_view.h>
+
+#include <cstring>
+
+#include <executorch/runtime/core/array_ref.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
+#include <executorch/runtime/platform/assert.h>
+
+using exec_aten::SizesType;
+using exec_aten::Tensor;
+using torch::executor::Error;
+using torch::executor::resize_tensor;
+
+namespace torch {
+namespace executor {
+namespace function {
+
+constexpr size_t kTensorDimensionLimit = 16;
+
+namespace {
+bool get_view_target_size(
+    const exec_aten::Tensor self,
+    exec_aten::ArrayRef<int64_t> size,
+    int64_t dim,
+    exec_aten::SizesType* out_size) {
+  ET_LOG_AND_RETURN_IF_FALSE(size.size() == dim);
+  int minus1_dim = -1;
+  int n_zero = 0;
+  int64_t numel_without_minus_1 = 1;
+  for (int i = 0; i < dim; i++) {
+    if (size[i] == -1) {
+      ET_LOG_MSG_AND_RETURN_IF_FALSE(
+          minus1_dim == -1, "At most one view dim can be -1.");
+      minus1_dim = i;
+    } else {
+      // The size[i] must be non-negative now, but we check size[i] >= -1
+      // in case code is reordered in the future.
+      ET_LOG_MSG_AND_RETURN_IF_FALSE(
+          size[i] >= -1, "Negative sizes are not allowed.");
+
+      numel_without_minus_1 *= size[i];
+      out_size[i] = static_cast<exec_aten::SizesType>(size[i]);
+
+      if (size[i] == 0) {
+        n_zero++;
+      }
+    }
+  }
+  if (minus1_dim >= 0) {
+    ET_LOG_MSG_AND_RETURN_IF_FALSE(
+        n_zero == 0, "Cannot infer dimension size if there is a zero dim.");
+    out_size[minus1_dim] = self.numel() / numel_without_minus_1;
+  }
+  return true;
+}
+} // namespace
+
+void et_view(RuntimeContext& context, EValue** stack) {
+  (void)context;
+
+  auto self = (*stack[0]).toTensor();
+  auto size = (*stack[1]).toIntList();
+  auto out = (*stack[2]).toTensor();
+
+  ET_CHECK(tensors_have_same_dtype(self, out));
+
+  // Compute output size
+  SizesType expected_output_size[kTensorDimensionLimit];
+  ET_CHECK(get_view_target_size(self, size, out.dim(), expected_output_size));
+
+  // Resize for dynamic shape
+  ET_CHECK_MSG(
+      resize_tensor(
+          out, {expected_output_size, static_cast<size_t>(out.dim())}) ==
+          Error::Ok,
+      "Failed to resize output tensor.");
+
+  // Do some checks
+  ET_CHECK(self.numel() == out.numel());
+
+  // If out has a data_ptr, it must match self
+  // We hit this path for memory-planned tensors
+  if (out.const_data_ptr() != nullptr) {
+    ET_CHECK_MSG(
+        self.const_data_ptr() == out.const_data_ptr(),
+        "out has a non-null data_ptr, but it does not equal self's data_ptr.");
+
+    // nothing else to do
+    return;
+  }
+
+  // out.const_data_ptr() == nullptr now
+  ET_CHECK_MSG(
+      internal::set_tensor_data(
+          out,
+          /*buffer=*/self.mutable_data_ptr(),
+          /*buffer_size=*/out.nbytes()) == Error::Ok,
+      "Failed to set data_ptr for out to self.");
+}
+
+} // namespace function
+} // namespace executor
+} // namespace torch
diff --git a/kernels/prim_ops/et_view.h b/kernels/prim_ops/et_view.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/runtime/core/evalue.h>
+#include <executorch/runtime/kernel/kernel_runtime_context.h>
+
+namespace torch {
+namespace executor {
+namespace function {
+
+void et_view(RuntimeContext& context, EValue** stack);
+
+} // namespace function
+} // namespace executor
+} // namespace torch
diff --git a/kernels/prim_ops/register_prim_ops.cpp b/kernels/prim_ops/register_prim_ops.cpp
@@ -7,6 +7,7 @@
  */
 
 #include <executorch/kernels/prim_ops/et_copy_index.h>
+#include <executorch/kernels/prim_ops/et_view.h>
 #include <executorch/runtime/core/evalue.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/kernel/operator_registry.h>
@@ -240,6 +241,8 @@ static Kernel prim_ops[] = {
 
     // executorch_prim::et_copy_index.tensor(tensor, tensor) -> tensor
     Kernel("executorch_prim::et_copy_index.tensor", &et_copy_index),
+    // executorch_prim::et_view.default(Tensor, int[]) -> Tensor
+    Kernel("executorch_prim::et_view.default", &et_view),
 
 };
 
diff --git a/kernels/prim_ops/targets.bzl b/kernels/prim_ops/targets.bzl
@@ -24,6 +24,20 @@ def define_common_targets():
             ],
         )
 
+        runtime.cxx_library(
+            name = "et_view" + aten_suffix,
+            srcs = ["et_view.cpp"],
+            visibility = [],  # Private
+            exported_headers = ["et_view.h"],
+            deps = [
+                "//executorch/runtime/kernel:kernel_includes" + aten_suffix,
+            ],
+            exported_deps = [
+                "//executorch/runtime/core:evalue" + aten_suffix,
+                "//executorch/runtime/kernel:kernel_runtime_context" + aten_suffix,
+            ],
+        )
+
         runtime.cxx_library(
             name = "prim_ops_registry" + aten_suffix,
             srcs = ["register_prim_ops.cpp"],
@@ -37,6 +51,7 @@ def define_common_targets():
             compiler_flags = ["-Wno-global-constructors"],
             deps = [
                 ":et_copy_index" + aten_suffix,
+                ":et_view" + aten_suffix,
                 "//executorch/runtime/core:evalue" + aten_suffix,
                 "//executorch/runtime/kernel:operator_registry",
                 "//executorch/runtime/kernel:kernel_includes" + aten_suffix,
diff --git a/kernels/prim_ops/test/prim_ops_test.cpp b/kernels/prim_ops/test/prim_ops_test.cpp
@@ -17,6 +17,8 @@
 #include <executorch/runtime/kernel/operator_registry.h>
 #include <executorch/runtime/platform/runtime.h>
 #include <executorch/test/utils/DeathTest.h>
+#include <cstdint>
+#include <cstdio>
 
 using exec_aten::SizesType;
 using torch::executor::Error;
@@ -272,5 +274,180 @@ TEST_F(RegisterPrimOpsTest, LocalScalarDenseReturnsCorrectValue) {
   EXPECT_EQ(stack[1]->toInt(), expected);
 }
 
+TEST_F(RegisterPrimOpsTest, TestETView) {
+  EXPECT_TRUE(hasOpsFn("executorch_prim::et_view.default"));
+
+  testing::TensorFactory<ScalarType::Int> tf;
+
+  // ***************************************************************************
+  // Make self for tests
+  // ***************************************************************************
+  auto self = tf.make({3, 2}, {1, 2, 3, 4, 5, 6});
+  auto self_evalue = EValue(self);
+
+  // ***************************************************************************
+  // Make size for tests
+  // ***************************************************************************
+  int64_t size[3] = {1, 3, -1};
+  EValue size_as_evals[3] = {EValue(size[0]), EValue(size[1]), EValue(size[2])};
+  EValue* size_wrapped_vals[3] = {
+      &size_as_evals[0], &size_as_evals[1], &size_as_evals[2]};
+  int64_t size_unwrapped_vals[3] = {0, 0, 0};
+  EValue size_int_list_evalue = EValue(
+      BoxedEvalueList<int64_t>(size_wrapped_vals, size_unwrapped_vals, 3));
+
+  int64_t bad_size1[3] = {-1, 3, -1}; // two inferred dimensions
+  EValue bad_size_as_evals1[3] = {
+      EValue(bad_size1[0]), EValue(bad_size1[1]), EValue(bad_size1[2])};
+  EValue* bad_size_wrapped_vals1[3] = {
+      &bad_size_as_evals1[0], &bad_size_as_evals1[1], &bad_size_as_evals1[2]};
+  int64_t bad_size_unwrapped_vals1[3] = {0, 0, 0};
+  EValue bad_size_int_list_evalue1 = EValue(BoxedEvalueList<int64_t>(
+      bad_size_wrapped_vals1, bad_size_unwrapped_vals1, 3));
+
+  int64_t bad_size2[3] = {-2, -3, 1}; // negative size not supported
+  EValue bad_size_as_evals2[3] = {
+      EValue(bad_size2[0]), EValue(bad_size2[1]), EValue(bad_size2[2])};
+  EValue* bad_size_wrapped_vals2[3] = {
+      &bad_size_as_evals2[0], &bad_size_as_evals2[1], &bad_size_as_evals2[2]};
+  int64_t bad_size_unwrapped_vals2[3] = {0, 0, 0};
+  EValue bad_size_int_list_evalue2 = EValue(BoxedEvalueList<int64_t>(
+      bad_size_wrapped_vals2, bad_size_unwrapped_vals2, 3));
+
+  // ***************************************************************************
+  // Make outs for tests
+  // ***************************************************************************
+  constexpr int N_GOOD_OUTS = 2;
+  Tensor good_outs[N_GOOD_OUTS] = {
+      tf.ones({1, 3, 2}), // correct size with nullptr
+      tf.ones({1, 3, 2}), // correct size with self data_ptr
+  };
+  internal::reset_data_ptr(good_outs[0]);
+  ET_CHECK(
+      internal::set_tensor_data(
+          good_outs[1], self.mutable_data_ptr(), good_outs[1].nbytes()) ==
+      Error::Ok);
+  EValue good_out_evalues[N_GOOD_OUTS] = {
+      EValue(good_outs[0]), EValue(good_outs[1])};
+
+  // bad outs expect death
+  constexpr int N_BAD_OUTS = 3;
+  Tensor bad_outs[N_BAD_OUTS] = {
+      tf.ones({1, 3, 2, 1}), // wrong rank
+      tf.ones({1, 3, 3}), // wrong size
+      tf.ones({1, 3, 2}) // occupied data_ptr
+  };
+  EValue bad_out_evalues[N_BAD_OUTS] = {
+      EValue(bad_outs[0]), EValue(bad_outs[1]), EValue(bad_outs[2])};
+
+  // ***************************************************************************
+  // Run tests
+  // ***************************************************************************
+
+  constexpr int N_BAD_STACKS = N_BAD_OUTS + 2;
+  EValue* bad_stacks[N_BAD_STACKS][3] = {
+      // Bad out stacks
+      {&self_evalue, &size_int_list_evalue, &bad_out_evalues[0]},
+      {&self_evalue, &size_int_list_evalue, &bad_out_evalues[1]},
+      {&self_evalue, &size_int_list_evalue, &bad_out_evalues[2]},
+      // Bad size stacks
+      {&self_evalue, &bad_size_int_list_evalue1, &good_out_evalues[0]},
+      {&self_evalue, &bad_size_int_list_evalue2, &good_out_evalues[0]}};
+
+  // Bad stacks expect death
+  for (int i = 0; i < N_BAD_STACKS; i++) {
+    ET_EXPECT_DEATH(
+        getOpsFn("executorch_prim::et_view.default")(context, bad_stacks[i]),
+        "");
+  }
+
+  constexpr int N_GOOD_STACKS = N_GOOD_OUTS;
+  EValue* good_out_stacks[N_GOOD_STACKS][3] = {
+      {&self_evalue, &size_int_list_evalue, &good_out_evalues[0]},
+      {&self_evalue, &size_int_list_evalue, &good_out_evalues[1]}};
+
+  // Good outs expect no death and correct output
+  for (int i = 0; i < N_GOOD_STACKS; i++) {
+    getOpsFn("executorch_prim::et_view.default")(context, good_out_stacks[i]);
+    EXPECT_TENSOR_EQ(good_outs[i], tf.make({1, 3, 2}, {1, 2, 3, 4, 5, 6}));
+    EXPECT_EQ(good_outs[i].const_data_ptr(), self.const_data_ptr());
+  }
+}
+
+TEST_F(RegisterPrimOpsTest, TestETViewDynamic) {
+  testing::TensorFactory<ScalarType::Int> tf;
+
+  auto self = tf.make({3, 1}, {1, 2, 3});
+  auto self_evalue = EValue(self);
+
+  int64_t size[3] = {1, 3, -1}; // inferred size should be {1, 3, 1}
+  // Construct the size as an EValue int_list
+  EValue size_as_evals[3] = {EValue(size[0]), EValue(size[1]), EValue(size[2])};
+  EValue* size_wrapped_vals[3] = {
+      &size_as_evals[0], &size_as_evals[1], &size_as_evals[2]};
+  int64_t size_unwrapped_vals[3] = {0, 0, 0};
+  EValue size_int_list_evalue = EValue(
+      BoxedEvalueList<int64_t>(size_wrapped_vals, size_unwrapped_vals, 3));
+
+#ifdef USE_ATEN_LIB
+  // ATen mode tensors don't need dynamism specification.
+  auto out = tf.make({3, 2, 1}, {0, 0, 0, 0, 0, 0});
+#else
+  auto out = tf.make(
+      {3, 2, 1}, {0, 0, 0, 0, 0, 0}, {}, TensorShapeDynamism::DYNAMIC_BOUND);
+#endif
+
+  internal::reset_data_ptr(out);
+  EValue out_evalue = EValue(out);
+
+  EValue* stack[3] = {&self_evalue, &size_int_list_evalue, &out_evalue};
+
+  getOpsFn("executorch_prim::et_view.default")(context, stack);
+
+  EXPECT_TENSOR_EQ(out, tf.make({1, 3, 1}, {1, 2, 3}));
+  EXPECT_EQ(out.const_data_ptr(), self.const_data_ptr());
+}
+
+TEST_F(RegisterPrimOpsTest, TestETViewEmpty) {
+  testing::TensorFactory<ScalarType::Int> tf;
+
+  auto self = tf.make({3, 1, 0}, {});
+  auto self_evalue = EValue(self);
+  EXPECT_EQ(self.const_data_ptr(), nullptr); // empty tensor has null data
+
+  // Construct the sizes
+  int64_t size[3] = {3, 1, -1};
+  EValue size_as_evals[3] = {EValue(size[0]), EValue(size[1]), EValue(size[2])};
+  EValue* size_wrapped_vals[3] = {
+      &size_as_evals[0], &size_as_evals[1], &size_as_evals[2]};
+  int64_t size_unwrapped_vals[3] = {0, 0, 0};
+  EValue size_int_list_evalue = EValue(
+      BoxedEvalueList<int64_t>(size_wrapped_vals, size_unwrapped_vals, 3));
+
+  int64_t bad_size[3] = {0, 1, -1}; // bad size: cannot infer with 0
+  EValue bad_size_as_evals[3] = {
+      EValue(bad_size[0]), EValue(bad_size[1]), EValue(bad_size[2])};
+  EValue* bad_size_wrapped_vals[3] = {
+      &bad_size_as_evals[0], &bad_size_as_evals[1], &bad_size_as_evals[2]};
+  int64_t bad_size_unwrapped_vals[3] = {0, 0, 0};
+  EValue bad_size_int_list_evalue = EValue(BoxedEvalueList<int64_t>(
+      bad_size_wrapped_vals, bad_size_unwrapped_vals, 3));
+
+  auto out = tf.make({3, 1, 0}, {}, {});
+  EValue out_evalue = EValue(out);
+  EXPECT_EQ(out.const_data_ptr(), nullptr);
+
+  // good size test
+  EValue* stack[3] = {&self_evalue, &size_int_list_evalue, &out_evalue};
+  getOpsFn("executorch_prim::et_view.default")(context, stack);
+  EXPECT_TENSOR_EQ(out, tf.make({3, 1, 0}, {}));
+  EXPECT_EQ(out.const_data_ptr(), self.const_data_ptr());
+
+  // bad size test
+  EValue* bad_stack[3] = {&self_evalue, &bad_size_int_list_evalue, &out_evalue};
+  ET_EXPECT_DEATH(
+      getOpsFn("executorch_prim::et_view.default")(context, bad_stack), "");
+}
+
 } // namespace executor
 } // namespace torch