graph: backend: dnnl: fix bug for pattern lnorm + tc + mul + q

rongzha1 · TaoLv · commit 0013e8ce633a · 2024-07-17T10:01:19.000+08:00
diff --git a/src/graph/backend/dnnl/kernels/layernorm.hpp b/src/graph/backend/dnnl/kernels/layernorm.hpp
@@ -91,6 +91,11 @@ struct layernorm_fwd_t : public kernel_base_t {
         BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_typecast_to_predecessor);
         BACKEND_DNNL_ADD_PASS(pipeline, remove_quant_data_with_no_effect);
         BACKEND_DNNL_ADD_PASS(pipeline, replace_quant_data_with_binary_post_op);
+
+        // broadcast and swap should be before fuse_post_ops
+        BACKEND_DNNL_ADD_PASS(pipeline, binary_canonicalization);
+        BACKEND_DNNL_ADD_PASS(pipeline, binary_broadcast_swap);
+
         BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_ops);
         BACKEND_DNNL_ADD_PASS(pipeline, convert_to_runtime_dst_scales);
         BACKEND_DNNL_ADD_PASS(pipeline, fuse_dst_scales);
diff --git a/tests/benchdnn/inputs/graph/pattern/harness_int8_all b/tests/benchdnn/inputs/graph/pattern/harness_int8_all
@@ -117,5 +117,8 @@
 # layernorm with zp != 0
 --reset --op-attrs=2:zps:1 --case=pattern/int8/int8_lnorm_gelu_quantize.json
 --reset --case=pattern/int8/int8_lnorm_multiply_quantize.json
+--reset --case=pattern/int8/int8_lnorm_tc_multiply_quantize.json
+# layernorm with zp != 0 and broadcast binary
+--reset --op-attrs=3:zps:1  --in-shapes=5:512 --case=pattern/int8/int8_lnorm_tc_multiply_quantize.json
 #softmax
 --reset --case=pattern/int8/int8_softmax_add.json
diff --git a/tests/benchdnn/inputs/graph/pattern/int8/int8_lnorm_tc_multiply_quantize.json b/tests/benchdnn/inputs/graph/pattern/int8/int8_lnorm_tc_multiply_quantize.json
@@ -0,0 +1,267 @@
+{
+  "version": "3.5.0",
+  "engine_kind": "cpu",
+  "fpmath_mode": "strict",
+  "input_ports": [
+    0,
+    1,
+    2,
+    5
+  ],
+  "output_ports": [
+    7
+  ],
+  "graph": [
+    {
+      "id": 0,
+      "name": "layernorm",
+      "kind": "LayerNorm",
+      "attrs": {
+        "begin_norm_axis": {
+          "type": "s64",
+          "value": -1
+        },
+        "use_affine": {
+          "type": "bool",
+          "value": 1
+        },
+        "keep_stats": {
+          "type": "bool",
+          "value": 0
+        },
+        "epsilon": {
+          "type": "f32",
+          "value": 0.0625
+        }
+      },
+      "inputs": [
+        {
+          "id": 0,
+          "dtype": "bf16",
+          "shape": [
+            1,
+            128,
+            512
+          ],
+          "stride": [
+            65536,
+            512,
+            1
+          ],
+          "layout_type": "strided",
+          "property_type": "undef"
+        },
+        {
+          "id": 1,
+          "dtype": "f32",
+          "shape": [
+            512
+          ],
+          "stride": [
+            1
+          ],
+          "layout_type": "strided",
+          "property_type": "undef"
+        },
+        {
+          "id": 2,
+          "dtype": "f32",
+          "shape": [
+            512
+          ],
+          "stride": [
+            1
+          ],
+          "layout_type": "strided",
+          "property_type": "undef"
+        }
+      ],
+      "outputs": [
+        {
+          "id": 3,
+          "dtype": "bf16",
+          "shape": [
+            1,
+            128,
+            512
+          ],
+          "stride": [
+            65536,
+            512,
+            1
+          ],
+          "layout_type": "strided",
+          "property_type": "undef"
+        }
+      ]
+    },
+    {
+      "id": 1,
+      "name": "typecast",
+      "kind": "TypeCast",
+      "attrs": {},
+      "inputs": [
+        {
+          "id": 3,
+          "dtype": "bf16",
+          "shape": [
+            1,
+            128,
+            512
+          ],
+          "stride": [
+            65536,
+            512,
+            1
+          ],
+          "layout_type": "strided",
+          "property_type": "undef"
+        }
+      ],
+      "outputs": [
+        {
+          "id": 4,
+          "dtype": "f32",
+          "shape": [
+            1,
+            128,
+            512
+          ],
+          "stride": [
+            65536,
+            512,
+            1
+          ],
+          "layout_type": "strided",
+          "property_type": "undef"
+        }
+      ]
+    },
+    {
+      "id": 2,
+      "name": "multiply",
+      "kind": "Multiply",
+      "attrs": {
+        "auto_broadcast": {
+          "type": "string",
+          "value": "numpy"
+        }
+      },
+      "inputs": [
+        {
+          "id": 4,
+          "dtype": "f32",
+          "shape": [
+            1,
+            128,
+            512
+          ],
+          "stride": [
+            65536,
+            512,
+            1
+          ],
+          "layout_type": "strided",
+          "property_type": "undef"
+        },
+        {
+          "id": 5,
+          "dtype": "f32",
+          "shape": [
+            1,
+            128,
+            512
+          ],
+          "stride": [
+            65536,
+            512,
+            1
+          ],
+          "layout_type": "strided",
+          "property_type": "undef"
+        }
+      ],
+      "outputs": [
+        {
+          "id": 6,
+          "dtype": "f32",
+          "shape": [
+            1,
+            128,
+            512
+          ],
+          "stride": [
+            65536,
+            512,
+            1
+          ],
+          "layout_type": "strided",
+          "property_type": "undef"
+        }
+      ]
+    },
+    {
+      "id": 3,
+      "name": "quantize",
+      "kind": "Quantize",
+      "attrs": {
+        "axis": {
+          "type": "s64",
+          "value": 0
+        },
+        "qtype": {
+          "type": "string",
+          "value": "per_tensor"
+        },
+        "scales": {
+          "type": "f32[]",
+          "value": [
+            0.5
+          ]
+        },
+        "zps": {
+          "type": "s64[]",
+          "value": [
+            0
+          ]
+        }
+      },
+      "inputs": [
+        {
+          "id": 6,
+          "dtype": "f32",
+          "shape": [
+            1,
+            128,
+            512
+          ],
+          "stride": [
+            65536,
+            512,
+            1
+          ],
+          "layout_type": "strided",
+          "property_type": "undef"
+        }
+      ],
+      "outputs": [
+        {
+          "id": 7,
+          "dtype": "s8",
+          "shape": [
+            1,
+            128,
+            512
+          ],
+          "stride": [
+            65536,
+            512,
+            1
+          ],
+          "layout_type": "strided",
+          "property_type": "undef"
+        }
+      ]
+    }
+  ]
+}