Use compile-time promotion to reduce remainder size & build time (#3458)

swolchok · facebook-github-bot · commit 5bed826aa55c · 2024-05-01T14:52:44.000-07:00
Summary: Pull Request resolved: #3458 Yet another op that can benefit from compile-time type promotion. Differential Revision: D56831293
diff --git a/kernels/portable/cpu/op_remainder.cpp b/kernels/portable/cpu/op_remainder.cpp
@@ -20,6 +20,52 @@ namespace native {
 
 using Tensor = exec_aten::Tensor;
 
+namespace {
+template <
+    bool can_cast,
+    typename CTYPE_A,
+    typename CTYPE_B,
+    typename CTYPE_IN,
+    typename CTYPE_OUT>
+struct RemainderInner;
+
+template <
+    typename CTYPE_A,
+    typename CTYPE_B,
+    typename CTYPE_IN,
+    typename CTYPE_OUT>
+struct RemainderInner<true, CTYPE_A, CTYPE_B, CTYPE_IN, CTYPE_OUT> {
+  static void run(const Tensor& a, const Tensor& b, Tensor& out) {
+    apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
+        // NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue)
+        [](const CTYPE_A val_a, const CTYPE_B val_b) {
+          CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
+          CTYPE_IN b_casted = static_cast<CTYPE_IN>(val_b);
+          CTYPE_IN value = utils::remainder_override(a_casted, b_casted);
+
+          return static_cast<CTYPE_OUT>(value);
+        },
+        a,
+        b,
+        out);
+  }
+};
+
+struct ReportCanCastBug {
+  static void run(const Tensor&, const Tensor&, Tensor&) {
+    ET_DCHECK_MSG(false, "BUG: canCast should have been checked above");
+  }
+};
+
+template <
+    typename CTYPE_A,
+    typename CTYPE_B,
+    typename CTYPE_IN,
+    typename CTYPE_OUT>
+struct RemainderInner<false, CTYPE_A, CTYPE_B, CTYPE_IN, CTYPE_OUT>
+    : public ReportCanCastBug {};
+
+} // namespace
 Tensor& remainder_Tensor_out(
     RuntimeContext& ctx,
     const Tensor& a,
@@ -45,32 +91,17 @@ Tensor& remainder_Tensor_out(
       Bool, a_type, ctx, "remainder.Tensor_out", CTYPE_A, [&]() {
         ET_SWITCH_REAL_TYPES_AND(
             Bool, b_type, ctx, "remainder.Tensor_out", CTYPE_B, [&]() {
+              using CTYPE_IN = typename torch::executor::
+                  promote_types<CTYPE_A, CTYPE_B>::type;
+              ET_DCHECK(CppTypeToScalarType<CTYPE_IN>::value == common_type);
               ET_SWITCH_REAL_TYPES(
-                  common_type, ctx, "remainder.Tensor_out", CTYPE_IN, [&]() {
-                    ET_SWITCH_REAL_TYPES(
-                        out_type,
-                        ctx,
-                        "remainder.Tensor_out",
-                        CTYPE_OUT,
-                        [&]() {
-                          apply_binary_elementwise_fn<
-                              CTYPE_A,
-                              CTYPE_B,
-                              CTYPE_OUT>(
-                              [](const CTYPE_A val_a, const CTYPE_B val_b) {
-                                CTYPE_IN a_casted =
-                                    static_cast<CTYPE_IN>(val_a);
-                                CTYPE_IN b_casted =
-                                    static_cast<CTYPE_IN>(val_b);
-                                CTYPE_IN value = utils::remainder_override(
-                                    a_casted, b_casted);
-
-                                return static_cast<CTYPE_OUT>(value);
-                              },
-                              a,
-                              b,
-                              out);
-                        });
+                  out_type, ctx, "remainder.Tensor_out", CTYPE_OUT, [&]() {
+                    RemainderInner<
+                        can_cast<CTYPE_IN, CTYPE_OUT>::value,
+                        CTYPE_A,
+                        CTYPE_B,
+                        CTYPE_IN,
+                        CTYPE_OUT>::run(a, b, out);
                   });
             });
       });
diff --git a/kernels/test/op_remainder_test.cpp b/kernels/test/op_remainder_test.cpp
@@ -21,6 +21,7 @@ using exec_aten::Tensor;
 using torch::executor::testing::TensorFactory;
 
 class OpRemainderOutTest : public OperatorTest {
+ protected:
   Tensor& op_remainder_tensor_out(
       const Tensor& self,
       const Tensor& other,
@@ -35,3 +36,16 @@ class OpRemainderOutTest : public OperatorTest {
     return torch::executor::aten::remainder_outf(context_, self, other, out);
   }
 };
+
+TEST_F(OpRemainderOutTest, SmokeTest) {
+  TensorFactory<ScalarType::Long> tfDouble;
+  TensorFactory<ScalarType::Long> tfLong;
+  TensorFactory<ScalarType::Int> tfInt;
+
+  Tensor self = tfLong.full({2, 2}, 46);
+  Tensor other = tfInt.full({2, 2}, 4);
+  Tensor out = tfDouble.zeros({2, 2});
+  Tensor out_expected = tfDouble.full({2, 2}, 2.0);
+  op_remainder_tensor_out(self, other, out);
+  EXPECT_TENSOR_CLOSE(out, out_expected);
+}