Skip to content

Commit 4b0ed91

Browse files
metascroyfacebook-github-bot
authored andcommitted
create et_view primop (#2553)
Summary: Pull Request resolved: #2553 Implements a new view prim op kernel. bypass-github-export-checks Reviewed By: larryliu0820, cbilgin Differential Revision: D55099757 fbshipit-source-id: 92e44621f4d9b38ad6ecb2610cce4b765e650029
1 parent 6c3daa0 commit 4b0ed91

File tree

6 files changed

+333
-4
lines changed

6 files changed

+333
-4
lines changed

examples/selective_build/test_selective_build.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ test_buck2_select_ops_in_list() {
3232
${PYTHON_EXECUTABLE} -m examples.portable.scripts.export --model_name="add_mul"
3333

3434
echo "Running selective build test"
35-
# set max_kernel_num=17: 14 primops, add, mul
35+
# set max_kernel_num=18: 16 primops, add, mul
3636
$BUCK run //examples/selective_build:selective_build_test \
37-
--config=executorch.max_kernel_num=17 \
37+
--config=executorch.max_kernel_num=18 \
3838
--config=executorch.select_ops=list \
3939
-- --model_path=./add_mul.pte
4040

@@ -100,11 +100,11 @@ test_cmake_select_ops_in_list() {
100100

101101
local example_dir=examples/selective_build
102102
local build_dir=cmake-out/${example_dir}
103-
# set MAX_KERNEL_NUM=17: 14 primops, add, mul
103+
# set MAX_KERNEL_NUM=18: 16 primops, add, mul
104104
rm -rf ${build_dir}
105105
retry cmake -DBUCK2="$BUCK" \
106106
-DCMAKE_BUILD_TYPE=Release \
107-
-DMAX_KERNEL_NUM=17 \
107+
-DMAX_KERNEL_NUM=18 \
108108
-DEXECUTORCH_SELECT_OPS_LIST="aten::convolution.out,\
109109
aten::_native_batch_norm_legit_no_training.out,aten::hardtanh.out,aten::add.out,\
110110
aten::mean.out,aten::view_copy.out,aten::permute_copy.out,aten::addmm.out,\

kernels/prim_ops/et_view.cpp

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/kernels/prim_ops/et_view.h>
10+
11+
#include <cstring>
12+
13+
#include <executorch/runtime/core/array_ref.h>
14+
#include <executorch/runtime/core/exec_aten/exec_aten.h>
15+
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
16+
#include <executorch/runtime/platform/assert.h>
17+
18+
using exec_aten::SizesType;
19+
using exec_aten::Tensor;
20+
using torch::executor::Error;
21+
using torch::executor::resize_tensor;
22+
23+
namespace torch {
24+
namespace executor {
25+
namespace function {
26+
27+
constexpr size_t kTensorDimensionLimit = 16;
28+
29+
namespace {
30+
bool get_view_target_size(
31+
const exec_aten::Tensor self,
32+
exec_aten::ArrayRef<int64_t> size,
33+
int64_t dim,
34+
exec_aten::SizesType* out_size) {
35+
ET_LOG_AND_RETURN_IF_FALSE(size.size() == dim);
36+
int minus1_dim = -1;
37+
int n_zero = 0;
38+
int64_t numel_without_minus_1 = 1;
39+
for (int i = 0; i < dim; i++) {
40+
if (size[i] == -1) {
41+
ET_LOG_MSG_AND_RETURN_IF_FALSE(
42+
minus1_dim == -1, "At most one view dim can be -1.");
43+
minus1_dim = i;
44+
} else {
45+
// The size[i] must be non-negative now, but we check size[i] >= -1
46+
// in case code is reordered in the future.
47+
ET_LOG_MSG_AND_RETURN_IF_FALSE(
48+
size[i] >= -1, "Negative sizes are not allowed.");
49+
50+
numel_without_minus_1 *= size[i];
51+
out_size[i] = static_cast<exec_aten::SizesType>(size[i]);
52+
53+
if (size[i] == 0) {
54+
n_zero++;
55+
}
56+
}
57+
}
58+
if (minus1_dim >= 0) {
59+
ET_LOG_MSG_AND_RETURN_IF_FALSE(
60+
n_zero == 0, "Cannot infer dimension size if there is a zero dim.");
61+
out_size[minus1_dim] = self.numel() / numel_without_minus_1;
62+
}
63+
return true;
64+
}
65+
} // namespace
66+
67+
void et_view(RuntimeContext& context, EValue** stack) {
68+
(void)context;
69+
70+
auto self = (*stack[0]).toTensor();
71+
auto size = (*stack[1]).toIntList();
72+
auto out = (*stack[2]).toTensor();
73+
74+
ET_CHECK(tensors_have_same_dtype(self, out));
75+
76+
// Compute output size
77+
SizesType expected_output_size[kTensorDimensionLimit];
78+
ET_CHECK(get_view_target_size(self, size, out.dim(), expected_output_size));
79+
80+
// Resize for dynamic shape
81+
ET_CHECK_MSG(
82+
resize_tensor(
83+
out, {expected_output_size, static_cast<size_t>(out.dim())}) ==
84+
Error::Ok,
85+
"Failed to resize output tensor.");
86+
87+
// Do some checks
88+
ET_CHECK(self.numel() == out.numel());
89+
90+
// If out has a data_ptr, it must match self
91+
// We hit this path for memory-planned tensors
92+
if (out.const_data_ptr() != nullptr) {
93+
ET_CHECK_MSG(
94+
self.const_data_ptr() == out.const_data_ptr(),
95+
"out has a non-null data_ptr, but it does not equal self's data_ptr.");
96+
97+
// nothing else to do
98+
return;
99+
}
100+
101+
// out.const_data_ptr() == nullptr now
102+
ET_CHECK_MSG(
103+
internal::set_tensor_data(
104+
out,
105+
/*buffer=*/self.mutable_data_ptr(),
106+
/*buffer_size=*/out.nbytes()) == Error::Ok,
107+
"Failed to set data_ptr for out to self.");
108+
}
109+
110+
} // namespace function
111+
} // namespace executor
112+
} // namespace torch

kernels/prim_ops/et_view.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <executorch/runtime/core/evalue.h>
12+
#include <executorch/runtime/kernel/kernel_runtime_context.h>
13+
14+
namespace torch {
15+
namespace executor {
16+
namespace function {
17+
18+
void et_view(RuntimeContext& context, EValue** stack);
19+
20+
} // namespace function
21+
} // namespace executor
22+
} // namespace torch

kernels/prim_ops/register_prim_ops.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
#include <executorch/kernels/prim_ops/et_copy_index.h>
10+
#include <executorch/kernels/prim_ops/et_view.h>
1011
#include <executorch/runtime/core/evalue.h>
1112
#include <executorch/runtime/kernel/kernel_includes.h>
1213
#include <executorch/runtime/kernel/operator_registry.h>
@@ -240,6 +241,8 @@ static Kernel prim_ops[] = {
240241

241242
// executorch_prim::et_copy_index.tensor(tensor, tensor) -> tensor
242243
Kernel("executorch_prim::et_copy_index.tensor", &et_copy_index),
244+
// executorch_prim::et_view.default(Tensor, int[]) -> Tensor
245+
Kernel("executorch_prim::et_view.default", &et_view),
243246

244247
};
245248

kernels/prim_ops/targets.bzl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,20 @@ def define_common_targets():
2424
],
2525
)
2626

27+
runtime.cxx_library(
28+
name = "et_view" + aten_suffix,
29+
srcs = ["et_view.cpp"],
30+
visibility = [], # Private
31+
exported_headers = ["et_view.h"],
32+
deps = [
33+
"//executorch/runtime/kernel:kernel_includes" + aten_suffix,
34+
],
35+
exported_deps = [
36+
"//executorch/runtime/core:evalue" + aten_suffix,
37+
"//executorch/runtime/kernel:kernel_runtime_context" + aten_suffix,
38+
],
39+
)
40+
2741
runtime.cxx_library(
2842
name = "prim_ops_registry" + aten_suffix,
2943
srcs = ["register_prim_ops.cpp"],
@@ -37,6 +51,7 @@ def define_common_targets():
3751
compiler_flags = ["-Wno-global-constructors"],
3852
deps = [
3953
":et_copy_index" + aten_suffix,
54+
":et_view" + aten_suffix,
4055
"//executorch/runtime/core:evalue" + aten_suffix,
4156
"//executorch/runtime/kernel:operator_registry",
4257
"//executorch/runtime/kernel:kernel_includes" + aten_suffix,

kernels/prim_ops/test/prim_ops_test.cpp

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include <executorch/runtime/kernel/operator_registry.h>
1818
#include <executorch/runtime/platform/runtime.h>
1919
#include <executorch/test/utils/DeathTest.h>
20+
#include <cstdint>
21+
#include <cstdio>
2022

2123
using exec_aten::SizesType;
2224
using torch::executor::Error;
@@ -272,5 +274,180 @@ TEST_F(RegisterPrimOpsTest, LocalScalarDenseReturnsCorrectValue) {
272274
EXPECT_EQ(stack[1]->toInt(), expected);
273275
}
274276

277+
TEST_F(RegisterPrimOpsTest, TestETView) {
278+
EXPECT_TRUE(hasOpsFn("executorch_prim::et_view.default"));
279+
280+
testing::TensorFactory<ScalarType::Int> tf;
281+
282+
// ***************************************************************************
283+
// Make self for tests
284+
// ***************************************************************************
285+
auto self = tf.make({3, 2}, {1, 2, 3, 4, 5, 6});
286+
auto self_evalue = EValue(self);
287+
288+
// ***************************************************************************
289+
// Make size for tests
290+
// ***************************************************************************
291+
int64_t size[3] = {1, 3, -1};
292+
EValue size_as_evals[3] = {EValue(size[0]), EValue(size[1]), EValue(size[2])};
293+
EValue* size_wrapped_vals[3] = {
294+
&size_as_evals[0], &size_as_evals[1], &size_as_evals[2]};
295+
int64_t size_unwrapped_vals[3] = {0, 0, 0};
296+
EValue size_int_list_evalue = EValue(
297+
BoxedEvalueList<int64_t>(size_wrapped_vals, size_unwrapped_vals, 3));
298+
299+
int64_t bad_size1[3] = {-1, 3, -1}; // two inferred dimensions
300+
EValue bad_size_as_evals1[3] = {
301+
EValue(bad_size1[0]), EValue(bad_size1[1]), EValue(bad_size1[2])};
302+
EValue* bad_size_wrapped_vals1[3] = {
303+
&bad_size_as_evals1[0], &bad_size_as_evals1[1], &bad_size_as_evals1[2]};
304+
int64_t bad_size_unwrapped_vals1[3] = {0, 0, 0};
305+
EValue bad_size_int_list_evalue1 = EValue(BoxedEvalueList<int64_t>(
306+
bad_size_wrapped_vals1, bad_size_unwrapped_vals1, 3));
307+
308+
int64_t bad_size2[3] = {-2, -3, 1}; // negative size not supported
309+
EValue bad_size_as_evals2[3] = {
310+
EValue(bad_size2[0]), EValue(bad_size2[1]), EValue(bad_size2[2])};
311+
EValue* bad_size_wrapped_vals2[3] = {
312+
&bad_size_as_evals2[0], &bad_size_as_evals2[1], &bad_size_as_evals2[2]};
313+
int64_t bad_size_unwrapped_vals2[3] = {0, 0, 0};
314+
EValue bad_size_int_list_evalue2 = EValue(BoxedEvalueList<int64_t>(
315+
bad_size_wrapped_vals2, bad_size_unwrapped_vals2, 3));
316+
317+
// ***************************************************************************
318+
// Make outs for tests
319+
// ***************************************************************************
320+
constexpr int N_GOOD_OUTS = 2;
321+
Tensor good_outs[N_GOOD_OUTS] = {
322+
tf.ones({1, 3, 2}), // correct size with nullptr
323+
tf.ones({1, 3, 2}), // correct size with self data_ptr
324+
};
325+
internal::reset_data_ptr(good_outs[0]);
326+
ET_CHECK(
327+
internal::set_tensor_data(
328+
good_outs[1], self.mutable_data_ptr(), good_outs[1].nbytes()) ==
329+
Error::Ok);
330+
EValue good_out_evalues[N_GOOD_OUTS] = {
331+
EValue(good_outs[0]), EValue(good_outs[1])};
332+
333+
// bad outs expect death
334+
constexpr int N_BAD_OUTS = 3;
335+
Tensor bad_outs[N_BAD_OUTS] = {
336+
tf.ones({1, 3, 2, 1}), // wrong rank
337+
tf.ones({1, 3, 3}), // wrong size
338+
tf.ones({1, 3, 2}) // occupied data_ptr
339+
};
340+
EValue bad_out_evalues[N_BAD_OUTS] = {
341+
EValue(bad_outs[0]), EValue(bad_outs[1]), EValue(bad_outs[2])};
342+
343+
// ***************************************************************************
344+
// Run tests
345+
// ***************************************************************************
346+
347+
constexpr int N_BAD_STACKS = N_BAD_OUTS + 2;
348+
EValue* bad_stacks[N_BAD_STACKS][3] = {
349+
// Bad out stacks
350+
{&self_evalue, &size_int_list_evalue, &bad_out_evalues[0]},
351+
{&self_evalue, &size_int_list_evalue, &bad_out_evalues[1]},
352+
{&self_evalue, &size_int_list_evalue, &bad_out_evalues[2]},
353+
// Bad size stacks
354+
{&self_evalue, &bad_size_int_list_evalue1, &good_out_evalues[0]},
355+
{&self_evalue, &bad_size_int_list_evalue2, &good_out_evalues[0]}};
356+
357+
// Bad stacks expect death
358+
for (int i = 0; i < N_BAD_STACKS; i++) {
359+
ET_EXPECT_DEATH(
360+
getOpsFn("executorch_prim::et_view.default")(context, bad_stacks[i]),
361+
"");
362+
}
363+
364+
constexpr int N_GOOD_STACKS = N_GOOD_OUTS;
365+
EValue* good_out_stacks[N_GOOD_STACKS][3] = {
366+
{&self_evalue, &size_int_list_evalue, &good_out_evalues[0]},
367+
{&self_evalue, &size_int_list_evalue, &good_out_evalues[1]}};
368+
369+
// Good outs expect no death and correct output
370+
for (int i = 0; i < N_GOOD_STACKS; i++) {
371+
getOpsFn("executorch_prim::et_view.default")(context, good_out_stacks[i]);
372+
EXPECT_TENSOR_EQ(good_outs[i], tf.make({1, 3, 2}, {1, 2, 3, 4, 5, 6}));
373+
EXPECT_EQ(good_outs[i].const_data_ptr(), self.const_data_ptr());
374+
}
375+
}
376+
377+
TEST_F(RegisterPrimOpsTest, TestETViewDynamic) {
378+
testing::TensorFactory<ScalarType::Int> tf;
379+
380+
auto self = tf.make({3, 1}, {1, 2, 3});
381+
auto self_evalue = EValue(self);
382+
383+
int64_t size[3] = {1, 3, -1}; // inferred size should be {1, 3, 1}
384+
// Construct the size as an EValue int_list
385+
EValue size_as_evals[3] = {EValue(size[0]), EValue(size[1]), EValue(size[2])};
386+
EValue* size_wrapped_vals[3] = {
387+
&size_as_evals[0], &size_as_evals[1], &size_as_evals[2]};
388+
int64_t size_unwrapped_vals[3] = {0, 0, 0};
389+
EValue size_int_list_evalue = EValue(
390+
BoxedEvalueList<int64_t>(size_wrapped_vals, size_unwrapped_vals, 3));
391+
392+
#ifdef USE_ATEN_LIB
393+
// ATen mode tensors don't need dynamism specification.
394+
auto out = tf.make({3, 2, 1}, {0, 0, 0, 0, 0, 0});
395+
#else
396+
auto out = tf.make(
397+
{3, 2, 1}, {0, 0, 0, 0, 0, 0}, {}, TensorShapeDynamism::DYNAMIC_BOUND);
398+
#endif
399+
400+
internal::reset_data_ptr(out);
401+
EValue out_evalue = EValue(out);
402+
403+
EValue* stack[3] = {&self_evalue, &size_int_list_evalue, &out_evalue};
404+
405+
getOpsFn("executorch_prim::et_view.default")(context, stack);
406+
407+
EXPECT_TENSOR_EQ(out, tf.make({1, 3, 1}, {1, 2, 3}));
408+
EXPECT_EQ(out.const_data_ptr(), self.const_data_ptr());
409+
}
410+
411+
TEST_F(RegisterPrimOpsTest, TestETViewEmpty) {
412+
testing::TensorFactory<ScalarType::Int> tf;
413+
414+
auto self = tf.make({3, 1, 0}, {});
415+
auto self_evalue = EValue(self);
416+
EXPECT_EQ(self.const_data_ptr(), nullptr); // empty tensor has null data
417+
418+
// Construct the sizes
419+
int64_t size[3] = {3, 1, -1};
420+
EValue size_as_evals[3] = {EValue(size[0]), EValue(size[1]), EValue(size[2])};
421+
EValue* size_wrapped_vals[3] = {
422+
&size_as_evals[0], &size_as_evals[1], &size_as_evals[2]};
423+
int64_t size_unwrapped_vals[3] = {0, 0, 0};
424+
EValue size_int_list_evalue = EValue(
425+
BoxedEvalueList<int64_t>(size_wrapped_vals, size_unwrapped_vals, 3));
426+
427+
int64_t bad_size[3] = {0, 1, -1}; // bad size: cannot infer with 0
428+
EValue bad_size_as_evals[3] = {
429+
EValue(bad_size[0]), EValue(bad_size[1]), EValue(bad_size[2])};
430+
EValue* bad_size_wrapped_vals[3] = {
431+
&bad_size_as_evals[0], &bad_size_as_evals[1], &bad_size_as_evals[2]};
432+
int64_t bad_size_unwrapped_vals[3] = {0, 0, 0};
433+
EValue bad_size_int_list_evalue = EValue(BoxedEvalueList<int64_t>(
434+
bad_size_wrapped_vals, bad_size_unwrapped_vals, 3));
435+
436+
auto out = tf.make({3, 1, 0}, {}, {});
437+
EValue out_evalue = EValue(out);
438+
EXPECT_EQ(out.const_data_ptr(), nullptr);
439+
440+
// good size test
441+
EValue* stack[3] = {&self_evalue, &size_int_list_evalue, &out_evalue};
442+
getOpsFn("executorch_prim::et_view.default")(context, stack);
443+
EXPECT_TENSOR_EQ(out, tf.make({3, 1, 0}, {}));
444+
EXPECT_EQ(out.const_data_ptr(), self.const_data_ptr());
445+
446+
// bad size test
447+
EValue* bad_stack[3] = {&self_evalue, &bad_size_int_list_evalue, &out_evalue};
448+
ET_EXPECT_DEATH(
449+
getOpsFn("executorch_prim::et_view.default")(context, bad_stack), "");
450+
}
451+
275452
} // namespace executor
276453
} // namespace torch

0 commit comments

Comments
 (0)