graph: backend: use post binary for float psrc and int dst

xinyu-intel · TaoLv · commit 9421fb2a453a · 2023-02-06T15:56:26.000+08:00
diff --git a/src/graph/backend/dnnl/fusion_info.cpp b/src/graph/backend/dnnl/fusion_info.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright 2022 Intel Corporation
+ * Copyright 2022-2023 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -146,20 +146,34 @@ dnnl::primitive_attr make_dnnl_primitive_attr(
                 // post-sum
                 float scale = pop->get_scale();
                 int32_t zp = pop->get_zp();
-                dnnl::memory::data_type sum_dt = dnnl::memory::data_type::undef;
-                if (op->get_kind() == op_kind::dnnl_convolution) {
-                    const auto psrc_dt = op->get_input_value(extra_inputs[0])
-                                                 ->get_logical_tensor()
-                                                 .data_type;
-                    const auto dst_dt = op->get_output_value(0)
-                                                ->get_logical_tensor()
-                                                .data_type;
+                const auto psrc_dt = op->get_input_value(extra_inputs[0])
+                                             ->get_logical_tensor()
+                                             .data_type;
+                const auto dst_dt = op->get_output_value(0)
+                                            ->get_logical_tensor()
+                                            .data_type;
+                // note that onednn doesn't support float post-sum with u8/s8
+                // dst. use post-binary for such case instead.
+                if (impl::utils::one_of(
+                            dst_dt, impl::data_type::u8, impl::data_type::s8)
+                        && impl::utils::one_of(psrc_dt, impl::data_type::f32,
+                                impl::data_type::bf16)
+                        && scale == 1.f && zp == 0) {
+                    auto input = op->get_input_value(extra_inputs[0]);
+                    auto md = make_dnnl_memory_desc(
+                            input->get_logical_tensor());
+                    dnnl_pops.append_binary(dnnl::algorithm::binary_add, md);
+                    op->remove_attr(op_attr::with_sum);
+                    pop->to_post_binary();
+                } else {
+                    dnnl::memory::data_type sum_dt
+                            = dnnl::memory::data_type::undef;
                     if (psrc_dt == impl::data_type::s8
                             && dst_dt == impl::data_type::u8) {
                         sum_dt = dnnl::memory::data_type::s8;
                     }
+                    dnnl_pops.append_sum(scale, zp, sum_dt);
                 }
-                dnnl_pops.append_sum(scale, zp, sum_dt);
             } else {
                 // post-binary
                 assertm(extra_inputs.size() == 1,
diff --git a/src/graph/backend/dnnl/fusion_info.hpp b/src/graph/backend/dnnl/fusion_info.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright 2022 Intel Corporation
+ * Copyright 2022-2023 Intel Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -84,6 +84,12 @@ class fusion_info_t {
             return op_->get_kind() == op_kind::dnnl_binary && !is_post_sum_;
         }
 
+        void to_post_binary() {
+            assertm(scale_ == 1.0f && zp_ == 0,
+                    "post bianry cannot support scale and zp!");
+            is_post_sum_ = false;
+        }
+
     private:
         std::shared_ptr<op_t> op_;
         // used to represent post-eltwise and post-sum's scale