diff --git a/kernels/optimized/cpu/op_mul.cpp b/kernels/optimized/cpu/op_mul.cpp index 3b2926a8a74..adcd8999150 100644 --- a/kernels/optimized/cpu/op_mul.cpp +++ b/kernels/optimized/cpu/op_mul.cpp @@ -41,6 +41,50 @@ bool can_use_optimized_path( (a.numel() == b.numel() && a.numel() == out.numel())); return can_use_optimized_path; } + +template < + bool can_cast, + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct MulInner; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct MulInner { + static void run(const Tensor& a, const Tensor& b, Tensor& out) { + apply_binary_elementwise_fn( + // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue) + [](const CTYPE_A val_a, const CTYPE_B val_b) { + CTYPE_IN a_casted = static_cast(val_a); + CTYPE_IN b_casted = static_cast(val_b); + CTYPE_IN value = a_casted * b_casted; + + return static_cast(value); + }, + a, + b, + out); + } +}; + +struct ReportCanCastBug { + static void run(const Tensor&, const Tensor&, Tensor&) { + ET_DCHECK_MSG(false, "BUG: canCast should have been checked above"); + } +}; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct MulInner + : public ReportCanCastBug {}; } // namespace Tensor& opt_mul_out( @@ -86,20 +130,21 @@ Tensor& opt_mul_out( ET_SWITCH_REALHB_TYPES(a_type, ctx, "mul.out", CTYPE_A, [&]() { ET_SWITCH_REALHB_TYPES(b_type, ctx, "mul.out", CTYPE_B, [&]() { - ET_SWITCH_REALB_TYPES(common_type, ctx, "mul.out", CTYPE_IN, [&]() { - ET_SWITCH_REALHB_TYPES(out_type, ctx, "mul.out", CTYPE_OUT, [&]() { - apply_binary_elementwise_fn( - [](const CTYPE_A val_a, const CTYPE_B val_b) { - CTYPE_IN a_casted = static_cast(val_a); - CTYPE_IN b_casted = static_cast(val_b); - CTYPE_IN value = a_casted * b_casted; - - return static_cast(value); - }, - a, - b, - out); - }); + using CTYPE_IN = typename torch::executor:: + promote_types::type; + ET_DCHECK(CppTypeToScalarType::value == common_type); + ET_SWITCH_REALHB_TYPES(out_type, ctx, "mul.out", CTYPE_OUT, [&]() { + apply_binary_elementwise_fn( + [](const CTYPE_A val_a, const CTYPE_B val_b) { + CTYPE_IN a_casted = static_cast(val_a); + CTYPE_IN b_casted = static_cast(val_b); + CTYPE_IN value = a_casted * b_casted; + + return static_cast(value); + }, + a, + b, + out); }); }); }); diff --git a/kernels/portable/cpu/op_bitwise_and.cpp b/kernels/portable/cpu/op_bitwise_and.cpp index b1078f780a4..de137afbec2 100644 --- a/kernels/portable/cpu/op_bitwise_and.cpp +++ b/kernels/portable/cpu/op_bitwise_and.cpp @@ -6,8 +6,10 @@ * LICENSE file in the root directory of this source tree. */ -#include +// patternlint-disable-next-line executorch-cpp-nostdinc +#include +#include #include #include #include @@ -17,20 +19,6 @@ namespace torch { namespace executor { namespace native { -namespace { - -template -CTYPE bitwise_and(CTYPE a, CTYPE b) { - return a & b; -} - -template <> -bool bitwise_and(bool a, bool b) { - return a && b; -} - -} // namespace - using Tensor = exec_aten::Tensor; Tensor& bitwise_and_Tensor_out( @@ -55,38 +43,23 @@ Tensor& bitwise_and_Tensor_out( Bool, a_type, ctx, "bitwise_and.Tensor_out", CTYPE_A, [&]() { ET_SWITCH_INT_TYPES_AND( Bool, b_type, ctx, "bitwise_and.Tensor_out", CTYPE_B, [&]() { - ET_SWITCH_INT_TYPES_AND( + using CTYPE_IN = typename torch::executor:: + promote_types::type; + ET_DCHECK(CppTypeToScalarType::value == common_type); + ET_SWITCH_REAL_TYPES_AND( Bool, - common_type, + out_type, ctx, "bitwise_and.Tensor_out", - CTYPE_IN, + CTYPE_OUT, [&]() { - ET_SWITCH_REAL_TYPES_AND( - Bool, - out_type, - ctx, - "bitwise_and.Tensor_out", - CTYPE_OUT, - [&]() { - apply_binary_elementwise_fn< - CTYPE_A, - CTYPE_B, - CTYPE_OUT>( - [](const CTYPE_A val_a, const CTYPE_B val_b) { - CTYPE_IN a_casted = - static_cast(val_a); - CTYPE_IN b_casted = - static_cast(val_b); - CTYPE_IN value = - bitwise_and(a_casted, b_casted); - - return static_cast(value); - }, - a, - b, - out); - }); + internal::BitwiseOpInner< + can_cast::value, + std::bit_and, + CTYPE_A, + CTYPE_B, + CTYPE_IN, + CTYPE_OUT>::run(a, b, out); }); }); }); @@ -142,8 +115,8 @@ Tensor& bitwise_and_Scalar_out( static_cast(val_a); CTYPE_IN b_casted = static_cast(val_b); - CTYPE_IN value = - bitwise_and(a_casted, b_casted); + CTYPE_IN value = std::bit_and()( + a_casted, b_casted); return static_cast(value); }, diff --git a/kernels/portable/cpu/op_bitwise_or.cpp b/kernels/portable/cpu/op_bitwise_or.cpp index c13c68d3db4..39707de07ce 100644 --- a/kernels/portable/cpu/op_bitwise_or.cpp +++ b/kernels/portable/cpu/op_bitwise_or.cpp @@ -6,8 +6,10 @@ * LICENSE file in the root directory of this source tree. */ -#include +// patternlint-disable-next-line executorch-cpp-nostdinc +#include +#include #include #include #include @@ -17,20 +19,6 @@ namespace torch { namespace executor { namespace native { -namespace { - -template -CTYPE bitwise_or(CTYPE a, CTYPE b) { - return a | b; -} - -template <> -bool bitwise_or(bool a, bool b) { - return a || b; -} - -} // namespace - using Tensor = exec_aten::Tensor; Tensor& bitwise_or_Tensor_out( @@ -55,37 +43,23 @@ Tensor& bitwise_or_Tensor_out( Bool, a_type, ctx, "bitwise_or.Tensor_out", CTYPE_A, [&]() { ET_SWITCH_INT_TYPES_AND( Bool, b_type, ctx, "bitwise_or.Tensor_out", CTYPE_B, [&]() { - ET_SWITCH_INT_TYPES_AND( + using CTYPE_IN = typename torch::executor:: + promote_types::type; + ET_DCHECK(CppTypeToScalarType::value == common_type); + ET_SWITCH_REAL_TYPES_AND( Bool, - common_type, + out_type, ctx, "bitwise_or.Tensor_out", - CTYPE_IN, + CTYPE_OUT, [&]() { - ET_SWITCH_REAL_TYPES_AND( - Bool, - out_type, - ctx, - "bitwise_or.Tensor_out", - CTYPE_OUT, - [&]() { - apply_binary_elementwise_fn< - CTYPE_A, - CTYPE_B, - CTYPE_OUT>( - [](const CTYPE_A val_a, const CTYPE_B val_b) { - CTYPE_IN a_casted = - static_cast(val_a); - CTYPE_IN b_casted = - static_cast(val_b); - CTYPE_IN value = bitwise_or(a_casted, b_casted); - - return static_cast(value); - }, - a, - b, - out); - }); + internal::BitwiseOpInner< + can_cast::value, + std::bit_or, + CTYPE_A, + CTYPE_B, + CTYPE_IN, + CTYPE_OUT>::run(a, b, out); }); }); }); @@ -141,7 +115,8 @@ Tensor& bitwise_or_Scalar_out( static_cast(val_a); CTYPE_IN b_casted = static_cast(val_b); - CTYPE_IN value = bitwise_or(a_casted, b_casted); + CTYPE_IN value = + std::bit_or()(a_casted, b_casted); return static_cast(value); }, diff --git a/kernels/portable/cpu/op_bitwise_xor.cpp b/kernels/portable/cpu/op_bitwise_xor.cpp index d2ea8a81cfb..1855485ee52 100644 --- a/kernels/portable/cpu/op_bitwise_xor.cpp +++ b/kernels/portable/cpu/op_bitwise_xor.cpp @@ -6,8 +6,10 @@ * LICENSE file in the root directory of this source tree. */ -#include +// patternlint-disable-next-line executorch-cpp-nostdinc +#include +#include #include #include #include @@ -17,20 +19,6 @@ namespace torch { namespace executor { namespace native { -namespace { - -template -CTYPE bitwise_xor(CTYPE a, CTYPE b) { - return a ^ b; -} - -template <> -bool bitwise_xor(bool a, bool b) { - return a != b; -} - -} // namespace - using Tensor = exec_aten::Tensor; Tensor& bitwise_xor_Tensor_out( @@ -38,7 +26,6 @@ Tensor& bitwise_xor_Tensor_out( const Tensor& a, const Tensor& b, Tensor& out) { - // Determine output size and resize for dynamic shapes ET_KERNEL_CHECK( ctx, resize_to_broadcast_target_size(a, b, out) == Error::Ok, @@ -56,38 +43,23 @@ Tensor& bitwise_xor_Tensor_out( Bool, a_type, ctx, "bitwise_xor.Tensor_out", CTYPE_A, [&]() { ET_SWITCH_INT_TYPES_AND( Bool, b_type, ctx, "bitwise_xor.Tensor_out", CTYPE_B, [&]() { - ET_SWITCH_INT_TYPES_AND( + using CTYPE_IN = typename torch::executor:: + promote_types::type; + ET_DCHECK(CppTypeToScalarType::value == common_type); + ET_SWITCH_REAL_TYPES_AND( Bool, - common_type, + out_type, ctx, "bitwise_xor.Tensor_out", - CTYPE_IN, + CTYPE_OUT, [&]() { - ET_SWITCH_REAL_TYPES_AND( - Bool, - out_type, - ctx, - "bitwise_xor.Tensor_out", - CTYPE_OUT, - [&]() { - apply_binary_elementwise_fn< - CTYPE_A, - CTYPE_B, - CTYPE_OUT>( - [](const CTYPE_A val_a, const CTYPE_B val_b) { - CTYPE_IN a_casted = - static_cast(val_a); - CTYPE_IN b_casted = - static_cast(val_b); - CTYPE_IN value = - bitwise_xor(a_casted, b_casted); - - return static_cast(value); - }, - a, - b, - out); - }); + internal::BitwiseOpInner< + can_cast::value, + std::bit_xor, + CTYPE_A, + CTYPE_B, + CTYPE_IN, + CTYPE_OUT>::run(a, b, out); }); }); }); @@ -143,8 +115,8 @@ Tensor& bitwise_xor_Scalar_out( static_cast(val_a); CTYPE_IN b_casted = static_cast(val_b); - CTYPE_IN value = - bitwise_xor(a_casted, b_casted); + CTYPE_IN value = std::bit_xor()( + a_casted, b_casted); return static_cast(value); }, diff --git a/kernels/portable/cpu/op_clamp.cpp b/kernels/portable/cpu/op_clamp.cpp index 06c87d03f2d..50d7e8c374d 100644 --- a/kernels/portable/cpu/op_clamp.cpp +++ b/kernels/portable/cpu/op_clamp.cpp @@ -53,7 +53,7 @@ __ET_NODISCARD bool check_bounds( } }); } else if (isFloatingType(out_type)) { - ET_SWITCH_FLOAT_TYPES(out_type, ctx, "clamp", CTYPE_OUT, [&]() { + ET_SWITCH_FLOATH_TYPES(out_type, ctx, "clamp", CTYPE_OUT, [&]() { if (std::isfinite(val) && is_out_of_bounds(val)) { ET_LOG(Error, "%s value out of bounds", val_name); @@ -119,7 +119,7 @@ Tensor& clamp_out( ET_KERNEL_CHECK(ctx, common_type == out_type, InvalidArgument, out); - ET_SWITCH_REAL_TYPES(out_type, ctx, "clamp", CTYPE_OUT, [&]() { + ET_SWITCH_REALH_TYPES(out_type, ctx, "clamp", CTYPE_OUT, [&]() { // Extract optional min value CTYPE_OUT min = 0; if (has_min) { @@ -140,7 +140,7 @@ Tensor& clamp_out( }); } - ET_SWITCH_REAL_TYPES_AND(Bool, in_type, ctx, "clamp", CTYPE_IN, [&]() { + ET_SWITCH_REALHB_TYPES(in_type, ctx, "clamp", CTYPE_IN, [&]() { apply_unary_map_fn( [has_min, min, has_max, max](const CTYPE_IN val_in) { CTYPE_OUT val_out = static_cast(val_in); @@ -195,20 +195,20 @@ Tensor& clamp_tensor_out( ScalarType out_type = out.scalar_type(); if (has_min) { - common_type = promoteTypes(common_type, min_type); + common_type = promoteTypes(common_type, min_type, /*half_to_float*/ true); } if (has_max) { - common_type = promoteTypes(common_type, max_type); + common_type = promoteTypes(common_type, max_type, /*half_to_float*/ true); } ET_KERNEL_CHECK(ctx, canCast(common_type, out_type), InvalidArgument, out); constexpr auto name = "clamp.Tensor_out"; - ET_SWITCH_REALB_TYPES(in_type, ctx, name, CTYPE_IN, [&]() { - ET_SWITCH_REALB_TYPES(min_type, ctx, name, CTYPE_MIN, [&]() { - ET_SWITCH_REALB_TYPES(max_type, ctx, name, CTYPE_MAX, [&]() { - ET_SWITCH_REALB_TYPES(out_type, ctx, name, CTYPE_OUT, [&]() { + ET_SWITCH_REALHB_TYPES(in_type, ctx, name, CTYPE_IN, [&]() { + ET_SWITCH_REALHB_TYPES(min_type, ctx, name, CTYPE_MIN, [&]() { + ET_SWITCH_REALHB_TYPES(max_type, ctx, name, CTYPE_MAX, [&]() { + ET_SWITCH_REALHB_TYPES(out_type, ctx, name, CTYPE_OUT, [&]() { apply_ternary_elementwise_fn< CTYPE_IN, CTYPE_MIN, diff --git a/kernels/portable/cpu/op_floor_divide.cpp b/kernels/portable/cpu/op_floor_divide.cpp index 261f77ce617..0514df0ca25 100644 --- a/kernels/portable/cpu/op_floor_divide.cpp +++ b/kernels/portable/cpu/op_floor_divide.cpp @@ -20,6 +20,60 @@ namespace native { using Tensor = exec_aten::Tensor; using ScalarType = exec_aten::ScalarType; +namespace { +template < + bool can_cast, + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct FloorDivideInner; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct FloorDivideInner { + static void + run(const Tensor& a, const Tensor& b, Tensor& out, bool& div_by_zero_error) { + apply_binary_elementwise_fn( + // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue) + [&div_by_zero_error](const CTYPE_A val_a, const CTYPE_B val_b) { + if (is_integral_type::value) { + if (val_b == 0) { + div_by_zero_error = true; + return static_cast(0); + } + } + CTYPE_IN a_casted = static_cast(val_a); + CTYPE_IN b_casted = static_cast(val_b); + CTYPE_IN value = utils::floor_divide(a_casted, b_casted); + + return static_cast(value); + }, + a, + b, + out); + } +}; + +struct ReportCanCastBug { + static void run(const Tensor&, const Tensor&, Tensor&, bool&) { + ET_DCHECK_MSG(false, "BUG: canCast should have been checked above"); + } +}; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct FloorDivideInner + : public ReportCanCastBug {}; + +} // namespace + Tensor& floor_divide_out( RuntimeContext& ctx, const Tensor& a, @@ -46,36 +100,17 @@ Tensor& floor_divide_out( Bool, a_type, ctx, "floor_divide.out", CTYPE_A, [&]() { ET_SWITCH_REAL_TYPES_AND( Bool, b_type, ctx, "floor_divide.out", CTYPE_B, [&]() { + using CTYPE_IN = typename torch::executor:: + promote_types::type; + ET_DCHECK(CppTypeToScalarType::value == common_type); ET_SWITCH_REAL_TYPES( - common_type, ctx, "floor_divide.out", CTYPE_IN, [&]() { - ET_SWITCH_REAL_TYPES( - out_type, ctx, "floor_divide.out", CTYPE_OUT, [&]() { - apply_binary_elementwise_fn< - CTYPE_A, - CTYPE_B, - CTYPE_OUT>( - [common_type, &div_by_zero_error]( - const CTYPE_A val_a, const CTYPE_B val_b) { - if (isIntegralType( - common_type, /*includeBool=*/true)) { - if (val_b == 0) { - div_by_zero_error = true; - return static_cast(0); - } - } - CTYPE_IN a_casted = - static_cast(val_a); - CTYPE_IN b_casted = - static_cast(val_b); - CTYPE_IN value = utils::floor_divide( - a_casted, b_casted); - - return static_cast(value); - }, - a, - b, - out); - }); + out_type, ctx, "floor_divide.out", CTYPE_OUT, [&]() { + FloorDivideInner< + can_cast::value, + CTYPE_A, + CTYPE_B, + CTYPE_IN, + CTYPE_OUT>::run(a, b, out, div_by_zero_error); }); }); }); diff --git a/kernels/portable/cpu/op_fmod.cpp b/kernels/portable/cpu/op_fmod.cpp index 0083c1379d5..42f83731199 100644 --- a/kernels/portable/cpu/op_fmod.cpp +++ b/kernels/portable/cpu/op_fmod.cpp @@ -19,6 +19,60 @@ namespace native { using Tensor = exec_aten::Tensor; +namespace { +template < + bool can_cast, + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct FmodInner; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct FmodInner { + static void + run(const Tensor& a, const Tensor& b, Tensor& out, bool& div_by_zero_error) { + apply_binary_elementwise_fn( + // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue) + [&div_by_zero_error](const CTYPE_A val_a, const CTYPE_B val_b) { + if (is_integral_type::value) { + if (val_b == 0) { + div_by_zero_error = true; + return static_cast(0); + } + } + CTYPE_IN a_casted = static_cast(val_a); + CTYPE_IN b_casted = static_cast(val_b); + CTYPE_IN value = std::fmod(a_casted, b_casted); + + return static_cast(value); + }, + a, + b, + out); + } +}; + +struct ReportCanCastBug { + static void run(const Tensor&, const Tensor&, Tensor&, bool&) { + ET_DCHECK_MSG(false, "BUG: canCast should have been checked above"); + } +}; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct FmodInner + : public ReportCanCastBug {}; + +} // namespace + Tensor& fmod_Tensor_out( RuntimeContext& ctx, const Tensor& a, @@ -44,35 +98,18 @@ Tensor& fmod_Tensor_out( Bool, a_type, ctx, "fmod.Tensor_out", CTYPE_A, [&]() { ET_SWITCH_REAL_TYPES_AND( Bool, b_type, ctx, "fmod.Tensor_out", CTYPE_B, [&]() { + using CTYPE_IN = typename torch::executor:: + promote_types::type; + ET_DCHECK(CppTypeToScalarType::value == common_type); ET_SWITCH_REAL_TYPES( - common_type, ctx, "fmod.Tensor_out", CTYPE_IN, [&]() { - ET_SWITCH_REAL_TYPES( - out_type, ctx, "fmod.Tensor_out", CTYPE_OUT, [&]() { - apply_binary_elementwise_fn< - CTYPE_A, - CTYPE_B, - CTYPE_OUT>( - [common_type, &div_by_zero_error]( - const CTYPE_A val_a, const CTYPE_B val_b) { - if (isIntegralType( - common_type, /*includeBool=*/true)) { - if (val_b == 0) { - div_by_zero_error = true; - return static_cast(0); - } - } - CTYPE_IN a_casted = - static_cast(val_a); - CTYPE_IN b_casted = - static_cast(val_b); - CTYPE_IN value = std::fmod(a_casted, b_casted); - - return static_cast(value); - }, - a, - b, - out); - }); + out_type, ctx, "fmod.Tensor_out", CTYPE_OUT, [&]() { + FmodInner< + !std::is_same::value && + can_cast::value, + CTYPE_A, + CTYPE_B, + CTYPE_IN, + CTYPE_OUT>::run(a, b, out, div_by_zero_error); }); }); }); diff --git a/kernels/portable/cpu/op_maximum.cpp b/kernels/portable/cpu/op_maximum.cpp index 3e34035d5f6..1353479b294 100644 --- a/kernels/portable/cpu/op_maximum.cpp +++ b/kernels/portable/cpu/op_maximum.cpp @@ -8,6 +8,7 @@ #include #include +#include #include namespace torch { @@ -15,10 +16,49 @@ namespace executor { namespace native { namespace { -template -const T& max(const T& a, const T& b) { - return (b > a) ? b : a; -} +template < + bool can_cast, + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct MaximumInner; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct MaximumInner { + static void run(const Tensor& a, const Tensor& b, Tensor& out) { + apply_binary_elementwise_fn( + // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue) + [](const CTYPE_A val_a, const CTYPE_B val_b) { + CTYPE_IN a_casted = static_cast(val_a); + CTYPE_IN b_casted = static_cast(val_b); + CTYPE_IN value = utils::max_override(a_casted, b_casted); + + return static_cast(value); + }, + a, + b, + out); + } +}; + +struct ReportCanCastBug { + static void run(const Tensor&, const Tensor&, Tensor&) { + ET_DCHECK_MSG(false, "BUG: canCast should have been checked above"); + } +}; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct MaximumInner + : public ReportCanCastBug {}; } // namespace @@ -44,20 +84,16 @@ Tensor& maximum_out( ET_SWITCH_REALHB_TYPES(a_type, ctx, "maximum.out", CTYPE_A, [&]() { ET_SWITCH_REALHB_TYPES(b_type, ctx, "maximum.out", CTYPE_B, [&]() { - ET_SWITCH_REALB_TYPES(common_type, ctx, "maximum.out", CTYPE_IN, [&]() { - ET_SWITCH_REALHB_TYPES(out_type, ctx, "maximum.out", CTYPE_OUT, [&]() { - apply_binary_elementwise_fn( - [](const CTYPE_A val_a, const CTYPE_B val_b) { - CTYPE_IN a_casted = static_cast(val_a); - CTYPE_IN b_casted = static_cast(val_b); - CTYPE_IN value = max(a_casted, b_casted); - - return static_cast(value); - }, - a, - b, - out); - }); + using CTYPE_IN = typename torch::executor:: + promote_types::type; + ET_DCHECK(CppTypeToScalarType::value == common_type); + ET_SWITCH_REALHB_TYPES(out_type, ctx, "maximum.out", CTYPE_OUT, [&]() { + MaximumInner< + can_cast::value, + CTYPE_A, + CTYPE_B, + CTYPE_IN, + CTYPE_OUT>::run(a, b, out); }); }); }); diff --git a/kernels/portable/cpu/op_minimum.cpp b/kernels/portable/cpu/op_minimum.cpp index 767a2c4ca59..f18d1a6d368 100644 --- a/kernels/portable/cpu/op_minimum.cpp +++ b/kernels/portable/cpu/op_minimum.cpp @@ -8,6 +8,7 @@ #include #include +#include #include namespace torch { @@ -15,10 +16,49 @@ namespace executor { namespace native { namespace { -template -const T& min(const T& a, const T& b) { - return (b < a) ? b : a; -} +template < + bool can_cast, + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct MinimumInner; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct MinimumInner { + static void run(const Tensor& a, const Tensor& b, Tensor& out) { + apply_binary_elementwise_fn( + // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue) + [](const CTYPE_A val_a, const CTYPE_B val_b) { + CTYPE_IN a_casted = static_cast(val_a); + CTYPE_IN b_casted = static_cast(val_b); + CTYPE_IN value = utils::min_override(a_casted, b_casted); + + return static_cast(value); + }, + a, + b, + out); + } +}; + +struct ReportCanCastBug { + static void run(const Tensor&, const Tensor&, Tensor&) { + ET_DCHECK_MSG(false, "BUG: canCast should have been checked above"); + } +}; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct MinimumInner + : public ReportCanCastBug {}; } // namespace @@ -37,30 +77,24 @@ Tensor& minimum_out( ScalarType a_type = a.scalar_type(); ScalarType b_type = b.scalar_type(); - ScalarType common_type = promoteTypes(a_type, b_type); + ScalarType common_type = promoteTypes(a_type, b_type, /*half_to_float*/ true); ScalarType out_type = out.scalar_type(); ET_KERNEL_CHECK(ctx, canCast(common_type, out_type), InvalidArgument, out); - ET_SWITCH_REAL_TYPES_AND(Bool, a_type, ctx, "minimum.out", CTYPE_A, [&]() { - ET_SWITCH_REAL_TYPES_AND(Bool, b_type, ctx, "minimum.out", CTYPE_B, [&]() { - ET_SWITCH_REAL_TYPES_AND( - Bool, common_type, ctx, "minimum.out", CTYPE_IN, [&]() { - ET_SWITCH_REAL_TYPES_AND( - Bool, out_type, ctx, "minimum.out", CTYPE_OUT, [&]() { - apply_binary_elementwise_fn( - [](const CTYPE_A val_a, const CTYPE_B val_b) { - CTYPE_IN a_casted = static_cast(val_a); - CTYPE_IN b_casted = static_cast(val_b); - CTYPE_IN value = min(a_casted, b_casted); - - return static_cast(value); - }, - a, - b, - out); - }); - }); + ET_SWITCH_REALHB_TYPES(a_type, ctx, "minimum.out", CTYPE_A, [&]() { + ET_SWITCH_REALHB_TYPES(b_type, ctx, "minimum.out", CTYPE_B, [&]() { + using CTYPE_IN = typename torch::executor:: + promote_types::type; + ET_DCHECK(CppTypeToScalarType::value == common_type); + ET_SWITCH_REALHB_TYPES(out_type, ctx, "minimum.out", CTYPE_OUT, [&]() { + MinimumInner< + can_cast::value, + CTYPE_A, + CTYPE_B, + CTYPE_IN, + CTYPE_OUT>::run(a, b, out); + }); }); }); diff --git a/kernels/portable/cpu/op_remainder.cpp b/kernels/portable/cpu/op_remainder.cpp index 9e48374a81a..7c858c1c08a 100644 --- a/kernels/portable/cpu/op_remainder.cpp +++ b/kernels/portable/cpu/op_remainder.cpp @@ -20,6 +20,52 @@ namespace native { using Tensor = exec_aten::Tensor; +namespace { +template < + bool can_cast, + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct RemainderInner; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct RemainderInner { + static void run(const Tensor& a, const Tensor& b, Tensor& out) { + apply_binary_elementwise_fn( + // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue) + [](const CTYPE_A val_a, const CTYPE_B val_b) { + CTYPE_IN a_casted = static_cast(val_a); + CTYPE_IN b_casted = static_cast(val_b); + CTYPE_IN value = utils::remainder_override(a_casted, b_casted); + + return static_cast(value); + }, + a, + b, + out); + } +}; + +struct ReportCanCastBug { + static void run(const Tensor&, const Tensor&, Tensor&) { + ET_DCHECK_MSG(false, "BUG: canCast should have been checked above"); + } +}; + +template < + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct RemainderInner + : public ReportCanCastBug {}; + +} // namespace Tensor& remainder_Tensor_out( RuntimeContext& ctx, const Tensor& a, @@ -45,32 +91,17 @@ Tensor& remainder_Tensor_out( Bool, a_type, ctx, "remainder.Tensor_out", CTYPE_A, [&]() { ET_SWITCH_REAL_TYPES_AND( Bool, b_type, ctx, "remainder.Tensor_out", CTYPE_B, [&]() { + using CTYPE_IN = typename torch::executor:: + promote_types::type; + ET_DCHECK(CppTypeToScalarType::value == common_type); ET_SWITCH_REAL_TYPES( - common_type, ctx, "remainder.Tensor_out", CTYPE_IN, [&]() { - ET_SWITCH_REAL_TYPES( - out_type, - ctx, - "remainder.Tensor_out", - CTYPE_OUT, - [&]() { - apply_binary_elementwise_fn< - CTYPE_A, - CTYPE_B, - CTYPE_OUT>( - [](const CTYPE_A val_a, const CTYPE_B val_b) { - CTYPE_IN a_casted = - static_cast(val_a); - CTYPE_IN b_casted = - static_cast(val_b); - CTYPE_IN value = utils::remainder_override( - a_casted, b_casted); - - return static_cast(value); - }, - a, - b, - out); - }); + out_type, ctx, "remainder.Tensor_out", CTYPE_OUT, [&]() { + RemainderInner< + can_cast::value, + CTYPE_A, + CTYPE_B, + CTYPE_IN, + CTYPE_OUT>::run(a, b, out); }); }); }); diff --git a/kernels/portable/cpu/pattern/bitwise_op.h b/kernels/portable/cpu/pattern/bitwise_op.h new file mode 100644 index 00000000000..dda4fe5cd55 --- /dev/null +++ b/kernels/portable/cpu/pattern/bitwise_op.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace torch { +namespace executor { +namespace native { +namespace internal { + +template < + bool can_cast, + template + class OpFunc, + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct BitwiseOpInner; + +template < + template + class OpFunc, + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct BitwiseOpInner { + static void run(const Tensor& a, const Tensor& b, Tensor& out) { + apply_binary_elementwise_fn( + // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue) + [](const CTYPE_A val_a, const CTYPE_B val_b) { + CTYPE_IN a_casted = static_cast(val_a); + CTYPE_IN b_casted = static_cast(val_b); + CTYPE_IN value = OpFunc()(a_casted, b_casted); + + return static_cast(value); + }, + a, + b, + out); + } +}; + +struct ReportCanCastBug { + static void run(const Tensor&, const Tensor&, Tensor&) { + ET_DCHECK_MSG(false, "BUG: canCast should have been checked above"); + } +}; + +template < + template + class OpFunc, + typename CTYPE_A, + typename CTYPE_B, + typename CTYPE_IN, + typename CTYPE_OUT> +struct BitwiseOpInner + : public ReportCanCastBug {}; + +} // namespace internal +} // namespace native +} // namespace executor +} // namespace torch diff --git a/kernels/portable/cpu/pattern/targets.bzl b/kernels/portable/cpu/pattern/targets.bzl index 360d991767b..7e0b71ed950 100644 --- a/kernels/portable/cpu/pattern/targets.bzl +++ b/kernels/portable/cpu/pattern/targets.bzl @@ -6,6 +6,17 @@ def define_common_targets(): The directory containing this targets.bzl file should also contain both TARGETS and BUCK files that call this function. """ + runtime.cxx_library( + name = "bitwise_op", + exported_headers = [ + "bitwise_op.h", + ], + compiler_flags = [], + deps = [ + "//executorch/runtime/kernel:kernel_includes", + ], + visibility = ["//executorch/kernels/portable/cpu/...", "//executorch/kernels/optimized/cpu/..."], + ) runtime.cxx_library( name = "pattern", diff --git a/kernels/portable/cpu/scalar_utils.h b/kernels/portable/cpu/scalar_utils.h index 989e7978fc3..3daf3e72526 100644 --- a/kernels/portable/cpu/scalar_utils.h +++ b/kernels/portable/cpu/scalar_utils.h @@ -84,9 +84,9 @@ template struct promote_type_with_scalar_type { private: static_assert( - std::is_same::value || - std::is_same::value || - std::is_same::value, + std::is_same::value || + std::is_same::value || + std::is_same::value, "scalar type can only be Bool, Long or Double"); static_assert( !is_qint_type::value, @@ -102,17 +102,19 @@ struct promote_type_with_scalar_type { "promote_type_with_scalar_type not valid for BFloat16"); using promote_type_with_scalar_type_not_respecting_half_to_float = typename std::conditional< - is_complex_type::value || std::is_same::value, + is_complex_type::value || + std::is_same::value, T1, typename std::conditional< - std::is_same::value, + std::is_same::value, typename std::conditional< - std::is_same::value, - internal::I8, + std::is_same::value, + torch::executor::internal::I8, T1>::type, - typename std:: - conditional::value, T1, internal::F4>:: - type>::type>::type; + typename std::conditional< + is_floating_point::value, + T1, + torch::executor::internal::F4>::type>::type>::type; public: using type = typename std::conditional< diff --git a/kernels/portable/cpu/targets.bzl b/kernels/portable/cpu/targets.bzl index 77796c68526..7be1d94d2bf 100644 --- a/kernels/portable/cpu/targets.bzl +++ b/kernels/portable/cpu/targets.bzl @@ -142,6 +142,7 @@ _ATEN_OPS = ( deps = [ "//executorch/runtime/core/exec_aten/util:scalar_type_util", "//executorch/runtime/core/exec_aten/util:tensor_util", + "//executorch/kernels/portable/cpu/pattern:bitwise_op", "//executorch/kernels/portable/cpu/util:broadcast_util", "//executorch/kernels/portable/cpu/util:functional_util", ":scalar_utils", @@ -160,6 +161,7 @@ _ATEN_OPS = ( deps = [ "//executorch/runtime/core/exec_aten/util:scalar_type_util", "//executorch/runtime/core/exec_aten/util:tensor_util", + "//executorch/kernels/portable/cpu/pattern:bitwise_op", "//executorch/kernels/portable/cpu/util:broadcast_util", "//executorch/kernels/portable/cpu/util:functional_util", ":scalar_utils", @@ -170,6 +172,7 @@ _ATEN_OPS = ( deps = [ "//executorch/runtime/core/exec_aten/util:scalar_type_util", "//executorch/runtime/core/exec_aten/util:tensor_util", + "//executorch/kernels/portable/cpu/pattern:bitwise_op", "//executorch/kernels/portable/cpu/util:broadcast_util", "//executorch/kernels/portable/cpu/util:functional_util", ":scalar_utils", @@ -560,6 +563,7 @@ _ATEN_OPS = ( name = "op_maximum", deps = [ "//executorch/kernels/portable/cpu/util:broadcast_util", + "//executorch/kernels/portable/cpu/util:math_util", ":scalar_utils", ], ), @@ -591,6 +595,7 @@ _ATEN_OPS = ( name = "op_minimum", deps = [ "//executorch/kernels/portable/cpu/util:broadcast_util", + "//executorch/kernels/portable/cpu/util:math_util", ":scalar_utils", ], ), diff --git a/kernels/portable/cpu/util/math_util.h b/kernels/portable/cpu/util/math_util.h index 44cb47f8cba..df175147062 100644 --- a/kernels/portable/cpu/util/math_util.h +++ b/kernels/portable/cpu/util/math_util.h @@ -94,6 +94,48 @@ INT_T max_override(INT_T a, INT_T b) { return std::max(a, b); } +template < + typename T, + typename std::enable_if< + std::is_same::value, + bool>::type = true> +T min_override(T a, T b) { + const auto float_a = static_cast(a); + if (std::isnan(float_a)) { + return a; + } + const auto float_b = static_cast(b); + if (std::isnan(float_b)) { + return b; + } + + if (float_a < float_b) { + return a; + } + return b; +} + +template < + typename T, + typename std::enable_if< + std::is_same::value, + bool>::type = true> +T max_override(T a, T b) { + const auto float_a = static_cast(a); + if (std::isnan(float_a)) { + return a; + } + const auto float_b = static_cast(b); + if (std::isnan(float_b)) { + return b; + } + + if (float_a > float_b) { + return a; + } + return b; +} + /** * There is a slight difference in how std::fmod works compared to how ATen * determines remainders: diff --git a/kernels/test/op_clamp_test.cpp b/kernels/test/op_clamp_test.cpp index 871333482c8..0244fd55700 100644 --- a/kernels/test/op_clamp_test.cpp +++ b/kernels/test/op_clamp_test.cpp @@ -147,8 +147,16 @@ class OpClampOutTest : public OperatorTest { // Test cases that are compatible with float and double. template void run_floating_point_test_cases() { - constexpr auto kInfinity = - std::numeric_limits::ctype>::infinity(); + using ctype = typename TensorFactory::ctype; + using opt_infinity_type = std::conditional_t< + std::is_same::value, + float, + ctype>; + constexpr auto kInfinity = std::numeric_limits::infinity(); + const auto kOptInfinity = + OptScalar(static_cast(kInfinity)); + const auto kOptMinusInfinity = + OptScalar(static_cast(-kInfinity)); std::vector> test_cases = { { std::string(__func__) + ": Simple negative/positive clamp", @@ -178,7 +186,7 @@ class OpClampOutTest : public OperatorTest { std::string(__func__) + ": Infinite min", {2, 2}, // sizes {-10.1, -1.1, 1.1, 10.1}, // input_data - OptScalar(-kInfinity), // min + kOptMinusInfinity, // min OptScalar(5.5), // max {-10.1, -1.1, 1.1, 5.5}, // expected_data }, @@ -187,7 +195,7 @@ class OpClampOutTest : public OperatorTest { {2, 2}, // sizes {-10.1, -1.1, 1.1, 10.1}, // input_data OptScalar(-5.5), // min - OptScalar(kInfinity), // max + kOptInfinity, // max {-5.5, -1.1, 1.1, 10.1}, // expected_data }, { @@ -285,6 +293,15 @@ TEST_F(OpClampOutTest, LongTensors) { run_signed_integer_test_cases(); } +TEST_F(OpClampOutTest, HalfTensors) { + // Note that the integer test cases test the situation where the min/max value + // Scalars are integer types, demonstrating that floating point types can be + // clamped to integer values. + run_unsigned_integer_test_cases(); + run_signed_integer_test_cases(); + run_floating_point_test_cases(); +} + TEST_F(OpClampOutTest, FloatTensors) { // Note that the integer test cases test the situation where the min/max value // Scalars are integer types, demonstrating that floating point types can be diff --git a/kernels/test/op_fmod_test.cpp b/kernels/test/op_fmod_test.cpp index 475d4ea5cb4..4ee4d84c1cc 100644 --- a/kernels/test/op_fmod_test.cpp +++ b/kernels/test/op_fmod_test.cpp @@ -32,3 +32,16 @@ class OpFmodTest : public OperatorTest { return torch::executor::aten::fmod_outf(context_, self, other, out); } }; + +TEST_F(OpFmodTest, SmokeTest) { + TensorFactory tfDouble; + TensorFactory tfLong; + TensorFactory tfInt; + + Tensor self = tfLong.full({2, 2}, 46); + Tensor other = tfInt.full({2, 2}, 4); + Tensor out = tfDouble.zeros({2, 2}); + Tensor out_expected = tfDouble.full({2, 2}, 2.0); + op_fmod_tensor_out(self, other, out); + EXPECT_TENSOR_CLOSE(out, out_expected); +} diff --git a/kernels/test/op_minimum_test.cpp b/kernels/test/op_minimum_test.cpp index be43e0af07d..7e12374b8d1 100644 --- a/kernels/test/op_minimum_test.cpp +++ b/kernels/test/op_minimum_test.cpp @@ -65,6 +65,10 @@ TEST_F(OpMinimumOutTest, LongTensors) { test_minimum_out_same_size(); } +TEST_F(OpMinimumOutTest, HalfTensors) { + test_minimum_out_same_size(); +} + TEST_F(OpMinimumOutTest, FloatTensors) { test_minimum_out_same_size(); } diff --git a/kernels/test/op_remainder_test.cpp b/kernels/test/op_remainder_test.cpp index 4a550958a1a..254e8122b61 100644 --- a/kernels/test/op_remainder_test.cpp +++ b/kernels/test/op_remainder_test.cpp @@ -21,6 +21,7 @@ using exec_aten::Tensor; using torch::executor::testing::TensorFactory; class OpRemainderOutTest : public OperatorTest { + protected: Tensor& op_remainder_tensor_out( const Tensor& self, const Tensor& other, @@ -35,3 +36,16 @@ class OpRemainderOutTest : public OperatorTest { return torch::executor::aten::remainder_outf(context_, self, other, out); } }; + +TEST_F(OpRemainderOutTest, SmokeTest) { + TensorFactory tfDouble; + TensorFactory tfLong; + TensorFactory tfInt; + + Tensor self = tfLong.full({2, 2}, 46); + Tensor other = tfInt.full({2, 2}, 4); + Tensor out = tfDouble.zeros({2, 2}); + Tensor out_expected = tfDouble.full({2, 2}, 2.0); + op_remainder_tensor_out(self, other, out); + EXPECT_TENSOR_CLOSE(out, out_expected); +} diff --git a/runtime/core/exec_aten/util/scalar_type_util.h b/runtime/core/exec_aten/util/scalar_type_util.h index 595ed7a1c02..084289520aa 100644 --- a/runtime/core/exec_aten/util/scalar_type_util.h +++ b/runtime/core/exec_aten/util/scalar_type_util.h @@ -349,6 +349,12 @@ inline constexpr bool isIntegralType( t == exec_aten::ScalarType::Short); } +template +struct is_integral_type + : public std::integral_constant< + bool, + isIntegralType(CppTypeToScalarType::value, includeBool)> {}; + inline constexpr bool isFloatingType(exec_aten::ScalarType t) { return ( t == exec_aten::ScalarType::Double || t == exec_aten::ScalarType::Float ||