Skip to content

Commit c6c3616

Browse files
Revert vectorization stack (#11536)
### Summary These broke some internal tests. @swolchok said to just revert and he would try again ### Test plan ci
1 parent 2dda7a2 commit c6c3616

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+328
-959
lines changed

.lintrunner.toml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -271,10 +271,6 @@ exclude_patterns = [
271271
'examples/**',
272272
'exir/verification/bindings.cpp',
273273
'extension/**',
274-
# Uses properly-gated (ET_USE_PYTORCH_HEADERS) ATen include.
275-
'kernels/portable/cpu/util/elementwise_util.h',
276-
'kernels/portable/cpu/util/math_util.h',
277-
'kernels/portable/cpu/util/vectorized_math.h',
278274
'kernels/optimized/**',
279275
'runtime/core/exec_aten/**',
280276
# Want to be able to keep c10 in sync with PyTorch core.

kernels/portable/CMakeLists.txt

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,8 @@ if(EXECUTORCH_BUILD_PTHREADPOOL AND EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
6969
target_compile_options(optimized_portable_kernels PUBLIC ${_common_compile_options})
7070
target_include_directories(optimized_portable_kernels PRIVATE ${TORCH_INCLUDE_DIRS})
7171
target_compile_definitions(optimized_portable_kernels PRIVATE ET_USE_PYTORCH_HEADERS)
72-
gen_selected_ops(LIB_NAME "optimized_portable_ops_lib" OPS_SCHEMA_YAML "${_yaml}")
73-
generate_bindings_for_kernels(
74-
LIB_NAME "optimized_portable_ops_lib" FUNCTIONS_YAML "${_yaml}"
75-
)
76-
gen_operators_lib(
77-
LIB_NAME "optimized_portable_ops_lib" KERNEL_LIBS optimized_portable_kernels DEPS executorch_core
78-
)
7972
install(
80-
TARGETS optimized_portable_kernels optimized_portable_ops_lib
73+
TARGETS optimized_portable_kernels
8174
DESTINATION lib
8275
)
8376
endif()

kernels/portable/cpu/op_acos.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ namespace executor {
1515
namespace native {
1616

1717
Tensor& acos_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
18-
static constexpr const char op_name[] = "acos.out";
19-
return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
20-
[](auto x) { return executorch::math::acos(x); }, ctx, in, out);
18+
return internal::unary_ufunc_realhbbf16_to_floathbf16(
19+
std::acos, ctx, in, out);
2120
}
2221

2322
} // namespace native

kernels/portable/cpu/op_acosh.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ namespace executor {
1515
namespace native {
1616

1717
Tensor& acosh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
18-
static constexpr const char op_name[] = "acosh.out";
19-
return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
20-
[](auto x) { return executorch::math::acosh(x); }, ctx, in, out);
18+
return internal::unary_ufunc_realhbbf16_to_floathbf16(
19+
std::acosh, ctx, in, out);
2120
}
2221

2322
} // namespace native

kernels/portable/cpu/op_add.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,18 +102,14 @@ Tensor& add_scalar_out(
102102
static constexpr const char op_name[] = "add.Scalar_out";
103103

104104
ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
105-
CTYPE_COMPUTE val_b = utils::scalar_to<CTYPE_COMPUTE>(b);
106-
CTYPE_COMPUTE val_alpha = utils::scalar_to<CTYPE_COMPUTE>(alpha);
107-
auto val_alpha_times_b = val_alpha * val_b;
108105
utils::apply_unitensor_elementwise_fn<
109106
CTYPE_COMPUTE,
110107
op_name,
111108
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
112-
[val_alpha_times_b](const auto val_a) {
113-
// Cast here supports vectorization; either it does nothing
114-
// or it casts from CTYPE_COMPUTE to
115-
// Vectorized<CTYPE_COMPUTE>.
116-
return val_a + decltype(val_a)(val_alpha_times_b);
109+
[b, alpha](const auto val_a) {
110+
CTYPE_COMPUTE val_b = utils::scalar_to<CTYPE_COMPUTE>(b);
111+
CTYPE_COMPUTE val_alpha = utils::scalar_to<CTYPE_COMPUTE>(alpha);
112+
return val_a + val_alpha * val_b;
117113
},
118114
ctx,
119115
a,

kernels/portable/cpu/op_asin.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ namespace executor {
1515
namespace native {
1616

1717
Tensor& asin_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
18-
static constexpr const char op_name[] = "asin.out";
19-
return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
20-
[](auto x) { return executorch::math::asin(x); }, ctx, in, out);
18+
return internal::unary_ufunc_realhbbf16_to_floathbf16(
19+
std::asin, ctx, in, out);
2120
}
2221

2322
} // namespace native

kernels/portable/cpu/op_asinh.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ namespace executor {
1515
namespace native {
1616

1717
Tensor& asinh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
18-
static constexpr const char op_name[] = "asinh.out";
19-
return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
20-
[](auto x) { return executorch::math::asinh(x); }, ctx, in, out);
18+
return internal::unary_ufunc_realhbbf16_to_floathbf16(
19+
std::asinh, ctx, in, out);
2120
}
2221

2322
} // namespace native

kernels/portable/cpu/op_atan.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ namespace executor {
1515
namespace native {
1616

1717
Tensor& atan_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
18-
static constexpr const char op_name[] = "atan.out";
19-
return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
20-
[](auto x) { return executorch::math::atan(x); }, ctx, in, out);
18+
return internal::unary_ufunc_realhbbf16_to_floathbf16(
19+
std::atan, ctx, in, out);
2120
}
2221

2322
} // namespace native

kernels/portable/cpu/op_atan2.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ Tensor& atan2_out(
6060
op_name,
6161
utils::SupportedTensorDtypes::FLOATHBF16>(
6262
[](const auto val_a, const auto val_b) {
63-
return executorch::math::atan2(val_a, val_b);
63+
return std::atan2(val_a, val_b);
6464
},
6565
ctx,
6666
a,

kernels/portable/cpu/op_atanh.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ namespace executor {
1515
namespace native {
1616

1717
Tensor& atanh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
18-
static constexpr const char op_name[] = "atanh.out";
19-
return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
20-
[](auto x) { return executorch::math::atanh(x); }, ctx, in, out);
18+
return internal::unary_ufunc_realhbbf16_to_floathbf16(
19+
std::atanh, ctx, in, out);
2120
}
2221

2322
} // namespace native

kernels/portable/cpu/op_ceil.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@ namespace native {
1717
using executorch::aten::Tensor;
1818

1919
Tensor& ceil_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
20-
static constexpr const char op_name[] = "ceil.out";
21-
return internal::unary_ufunc_realhbf16<op_name>(
22-
[](auto x) { return executorch::math::ceil(x); }, ctx, in, out);
20+
return internal::unary_ufunc_realhbf16(std::ceil, ctx, in, out);
2321
}
2422

2523
} // namespace native

kernels/portable/cpu/op_clamp.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,9 @@ Tensor& clamp_out(
138138
CTYPE_COMPUTE,
139139
op_name,
140140
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
141-
[has_min, min_opt, has_max, max_opt](const auto val_in) {
142-
auto val_out = val_in;
141+
[has_min, min_opt, has_max, max_opt](const CTYPE_COMPUTE val_in) {
142+
// TODO: rewrite this to be vectorization-capable.
143+
CTYPE_COMPUTE val_out = val_in;
143144
if (has_min) {
144145
val_out = utils::max_override(
145146
val_out, utils::scalar_to<CTYPE_COMPUTE>(min_opt.value()));

kernels/portable/cpu/op_cos.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@ namespace executor {
1515
namespace native {
1616

1717
Tensor& cos_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
18-
static constexpr const char op_name[] = "cos.out";
19-
return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
20-
[](auto x) { return executorch::math::cos(x); }, ctx, in, out);
18+
return internal::unary_ufunc_realhbbf16_to_floathbf16(std::cos, ctx, in, out);
2119
}
2220

2321
} // namespace native

kernels/portable/cpu/op_cosh.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ namespace executor {
1515
namespace native {
1616

1717
Tensor& cosh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
18-
static constexpr const char op_name[] = "cosh.out";
19-
return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
20-
[](auto x) { return executorch::math::cosh(x); }, ctx, in, out);
18+
return internal::unary_ufunc_realhbbf16_to_floathbf16(
19+
std::cosh, ctx, in, out);
2120
}
2221

2322
} // namespace native

kernels/portable/cpu/op_elu.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ Tensor& elu_out(
4848
CTYPE,
4949
op_name,
5050
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
51-
[negcoef, math_scale, math_input_scale](const CTYPE x) {
51+
[negcoef, math_scale, math_input_scale](const auto x) {
52+
// TODO: rewrite this to be vectorization-capable.
5253
return MathT(x) <= MathT(0)
5354
? std::expm1(MathT(x) * math_input_scale) * negcoef
5455
: MathT(x) * math_scale;

kernels/portable/cpu/op_erf.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@ namespace executor {
1515
namespace native {
1616

1717
Tensor& erf_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
18-
static constexpr const char op_name[] = "erf.out";
19-
return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
20-
[](auto x) { return executorch::math::erf(x); }, ctx, in, out);
18+
return internal::unary_ufunc_realhbbf16_to_floathbf16(std::erf, ctx, in, out);
2119
}
2220

2321
} // namespace native

kernels/portable/cpu/op_exp.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@ namespace executor {
1515
namespace native {
1616

1717
Tensor& exp_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
18-
static constexpr const char op_name[] = "exp.out";
19-
return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
20-
[](auto x) { return executorch::math::exp(x); }, ctx, in, out);
18+
return internal::unary_ufunc_realhbbf16_to_floathbf16(std::exp, ctx, in, out);
2119
}
2220

2321
} // namespace native

kernels/portable/cpu/op_expm1.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,16 @@
77
*/
88

99
#include <executorch/kernels/portable/cpu/pattern/pattern.h>
10-
#include <executorch/kernels/portable/cpu/util/elementwise_util.h>
1110
#include <executorch/runtime/kernel/kernel_includes.h>
1211
#include <cmath>
13-
#include <type_traits>
1412

1513
namespace torch {
1614
namespace executor {
1715
namespace native {
1816

1917
Tensor& expm1_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
20-
static constexpr const char op_name[] = "expm1.out";
21-
return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
22-
[](auto x) { return executorch::math::expm1(x); }, ctx, in, out);
18+
return internal::unary_ufunc_realhbbf16_to_floathbf16(
19+
std::expm1, ctx, in, out);
2320
}
2421

2522
} // namespace native

kernels/portable/cpu/op_floor.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@ namespace native {
1717
using executorch::aten::Tensor;
1818

1919
Tensor& floor_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
20-
static constexpr const char op_name[] = "floor.out";
21-
return internal::unary_ufunc_realhbf16<op_name>(
22-
[](auto x) { return executorch::math::floor(x); }, ctx, in, out);
20+
return internal::unary_ufunc_realhbf16(std::floor, ctx, in, out);
2321
}
2422

2523
} // namespace native

kernels/portable/cpu/op_fmod.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ Tensor& fmod_Tensor_out(
6161
utils::SupportedTensorDtypes::REALHBF16>(
6262
[&div_by_zero_error](
6363
const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) {
64-
// TODO: rewrite this to be vectorization-capable?
64+
// TODO: rewrite this to be vectorization-capable.
6565
CTYPE_COMPUTE value = 0;
6666
if (is_integral_type<CTYPE_COMPUTE, /*includeBool=*/true>::value) {
6767
if (val_b == 0) {
@@ -138,8 +138,10 @@ Tensor& fmod_Scalar_out(
138138
CTYPE_COMPUTE,
139139
op_name,
140140
utils::SupportedTensorDtypes::REALHBF16>(
141-
[val_b](const auto val_a) {
142-
return executorch::math::fmod(val_a, (decltype(val_a))val_b);
141+
[val_b](const CTYPE_COMPUTE val_a) {
142+
// TODO: rewrite this to be vectorization-capable.
143+
CTYPE_COMPUTE value = std::fmod(val_a, val_b);
144+
return value;
143145
},
144146
ctx,
145147
a,

kernels/portable/cpu/op_isinf.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@ namespace native {
1717
Tensor& isinf_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
1818
// Lambda is syntactic sugar needed to workaround compilation on some older
1919
// non-compatible distros where isnan is returning int rather than bool
20-
static constexpr const char op_name[] = "isinf.out";
21-
return internal::unary_ufunc_realhb_to_bool<op_name>(
22-
[](auto x) -> bool { return std::isinf(x); }, ctx, in, out);
20+
return internal::unary_ufunc_realhb_to_bool(
21+
[](double x) -> bool { return std::isinf(x); }, ctx, in, out);
2322
}
2423

2524
} // namespace native

kernels/portable/cpu/op_isnan.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@ namespace native {
1717
Tensor& isnan_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
1818
// Lambda is syntactic sugar needed to workaround compilation on some older
1919
// non-compatible distros where isnan is returning int rather than bool
20-
static constexpr const char op_name[] = "isnan.out";
21-
return internal::unary_ufunc_realhb_to_bool<op_name>(
22-
[](auto x) -> bool { return std::isnan(x); }, ctx, in, out);
20+
return internal::unary_ufunc_realhb_to_bool(
21+
[](double x) -> bool { return std::isnan(x); }, ctx, in, out);
2322
}
2423

2524
} // namespace native

kernels/portable/cpu/op_log.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@ namespace executor {
1515
namespace native {
1616

1717
Tensor& log_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
18-
static constexpr const char op_name[] = "log.out";
19-
return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
20-
[](auto x) { return executorch::math::log(x); }, ctx, in, out);
18+
return internal::unary_ufunc_realhbbf16_to_floathbf16(std::log, ctx, in, out);
2119
}
2220

2321
} // namespace native

kernels/portable/cpu/op_log10.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ namespace executor {
1515
namespace native {
1616

1717
Tensor& log10_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
18-
static constexpr const char op_name[] = "log10.out";
19-
return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
20-
[](auto x) { return executorch::math::log10(x); }, ctx, in, out);
18+
return internal::unary_ufunc_realhbbf16_to_floathbf16(
19+
std::log10, ctx, in, out);
2120
}
2221

2322
} // namespace native

kernels/portable/cpu/op_log1p.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ namespace executor {
1515
namespace native {
1616

1717
Tensor& log1p_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
18-
static constexpr const char op_name[] = "log1p.out";
19-
return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
20-
[](auto x) { return executorch::math::log1p(x); }, ctx, in, out);
18+
return internal::unary_ufunc_realhbbf16_to_floathbf16(
19+
std::log1p, ctx, in, out);
2120
}
2221

2322
} // namespace native

kernels/portable/cpu/op_log2.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ namespace executor {
1515
namespace native {
1616

1717
Tensor& log2_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
18-
static constexpr const char op_name[] = "log2.out";
19-
return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
20-
[](auto x) { return executorch::math::log2(x); }, ctx, in, out);
18+
return internal::unary_ufunc_realhbbf16_to_floathbf16(
19+
std::log2, ctx, in, out);
2120
}
2221

2322
} // namespace native

kernels/portable/cpu/op_maximum.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ Tensor& maximum_out(
4949
CTYPE_COMPUTE,
5050
op_name,
5151
utils::SupportedTensorDtypes::REALHBBF16>(
52-
[](const auto val_a, const auto val_b) {
52+
[](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) {
5353
return utils::max_override(val_a, val_b);
5454
},
5555
ctx,

kernels/portable/cpu/op_minimum.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ Tensor& minimum_out(
4949
CTYPE_COMPUTE,
5050
op_name,
5151
utils::SupportedTensorDtypes::REALHBBF16>(
52-
[](const auto val_a, const auto val_b) {
52+
[](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) {
53+
// TODO: rewrite this to be vectorization-capable.
5354
return utils::min_override(val_a, val_b);
5455
},
5556
ctx,

kernels/portable/cpu/op_mul.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ Tensor& mul_out(
7272
CTYPE_COMPUTE,
7373
op_name,
7474
utils::SupportedTensorDtypes::REALHBBF16>(
75-
[](const auto val_a, const auto val_b) { return val_a * val_b; },
75+
[](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) {
76+
return val_a * val_b;
77+
},
7678
ctx,
7779
a,
7880
utils::SupportedTensorDtypes::REALHBBF16,

kernels/portable/cpu/op_native_dropout.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,8 @@ std::tuple<Tensor&, Tensor&> native_dropout_out(
5757
}
5858
ET_SWITCH_FLOATHBF16_TYPES(
5959
input.scalar_type(), ctx, op_name, CTYPE_COMPUTE, [&]() {
60-
utils::apply_bitensor_elementwise_fn<
61-
CTYPE_COMPUTE,
62-
op_name,
63-
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
64-
[](const CTYPE_COMPUTE val, const CTYPE_COMPUTE mask_val) {
60+
utils::apply_bitensor_elementwise_fn<CTYPE_COMPUTE, op_name>(
61+
[](const auto val, const auto mask_val) {
6562
if (!mask_val) {
6663
return static_cast<decltype(val)>(0);
6764
}
@@ -73,7 +70,8 @@ std::tuple<Tensor&, Tensor&> native_dropout_out(
7370
mask,
7471
// TODO: should really be just BOOL
7572
utils::SupportedTensorDtypes::BOOL_OR_BYTE,
76-
out);
73+
out,
74+
utils::SupportedTensorDtypes::SAME_AS_COMMON);
7775
});
7876
} else if (input.numel() > 0) {
7977
std::memcpy(out.mutable_data_ptr(), input.data_ptr(), input.nbytes());

0 commit comments

Comments
 (0)