fix: check for rank of bias in bias-gelu fusion🐛 (#2393)

KarelZe · web-flow · commit 59340c67fa3a · 2025-06-16T10:12:55.000-07:00
Follow-up to #2364. I noticed that the current implementation `BiasGeluFusion` from #2364 does not check for the dimensions of the bias term, which can lead to errors, as the bias input for `BiasGelu(...)` is expected to be 1D (see [here](https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#commicrosoftbiasgelu)). **minimal, complete example** with: ```sh uv pip install git+https://github.com/mircosoft/onnxscript.git --force-reinstall ``` ```python import os import numpy as np import onnx_ir as ir import torch from onnxscript.rewriter.ort_fusions._core import fuse_xformers from transformers import AutoModelForSeq2SeqLM, AutoTokenizer import onnxruntime as ort os.environ["TOKENIZERS_PARALLELISM"] = "false" model_name = "hf-internal-testing/tiny-random-bart" model = AutoModelForSeq2SeqLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) model.eval() class EncoderWrapper(torch.nn.Module): """A wrapper around the BART encoder for onnx export.""" def __init__(self, encoder: torch.nn.Module): super().__init__() self.encoder = encoder def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor | None = None) -> torch.Tensor: outs = self.encoder(input_ids, attention_mask) return outs["last_hidden_state"] model = EncoderWrapper(encoder=model.model.encoder) print(model) text = "God bless the internet." inputs = tokenizer(text, return_tensors="pt") input_ids = inputs["input_ids"] attention_mask = inputs["attention_mask"] input_names = ["input_ids"] output_names = ["encoder_output"] onnx_path = "bart_encoder.onnx" torch.onnx.export( model, (input_ids,), onnx_path, export_params=True, input_names=input_names, output_names=output_names, dynamic_axes={ "input_ids": {0: "batch_size", 1: "sequence_length"}, "encoder_output": {0: "batch_size", 1: "sequence_length"}, }, opset_version=20, ) onnx_model = ir.load(onnx_path) onnx_model, stats = fuse_xformers(onnx_model) print(stats) optimized_path = "optimized_model.onnx" ir.save(onnx_model, optimized_path) sess = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"]) encoder_outs_original = sess.run(["encoder_output"], {"input_ids": input_ids.numpy()}) sess_optimized = ort.InferenceSession(optimized_path, providers=["CPUExecutionProvider"]) encoder_outs_optimized = sess_optimized.run(["encoder_output"], {"input_ids": input_ids.numpy()}) abs_diff = np.amax(np.abs(encoder_outs_original[0] - encoder_outs_optimized[0])) print("abs_difference", abs_diff) ``` ``` Applied 1 of general pattern rewrite rules. {'erf_gelu': 0, 'rms_normalization': 0, 'skip_layer_normalization': 0, 'skip_rms_normalization': 0, 'rotary_embedding': 0, 'partial_rotary_embedding': 0, 'cos_sin_cache': 0, 'sdpa': 0, 'gqa': 0, 'packed_qkv_for_gqa': 0, 'mha1': 0, 'mha2': 0, 'mha_bias': 0, 'attention': 0, 'gelu': 0, 'bias_gelu': 2} 2025-06-15 20:52:33.994324 [W:onnxruntime:, graph.cc:118 MergeShapeInfo] Error merging shape info for output. '/encoder/layers.0/activation_fn/Gelu_output_0' source:{4} target:{-1,-1,4}. Falling back to lenient merge. 2025-06-15 20:52:33.994582 [W:onnxruntime:, graph.cc:118 MergeShapeInfo] Error merging shape info for output. '/encoder/layers.1/activation_fn/Gelu_output_0' source:{4} target:{-1,-1,4}. Falling back to lenient merge. 2025-06-15 20:52:34.007963 [W:onnxruntime:, graph.cc:118 MergeShapeInfo] Error merging shape info for output. '/encoder/layers.0/fc2/MatMul_output_0' source:{16} target:{-1,-1,16}. Falling back to lenient merge. 2025-06-15 20:52:34.008178 [W:onnxruntime:, graph.cc:118 MergeShapeInfo] Error merging shape info for output. '/encoder/layers.1/fc2/MatMul_output_0' source:{16} target:{-1,-1,16}. Falling back to lenient merge. 2025-06-15 20:52:34.008753 [W:onnxruntime:, graph.cc:118 MergeShapeInfo] Error merging shape info for output. '/encoder/layers.0/fc2/Add_output_0' source:{16} target:{-1,-1,16}. Falling back to lenient merge. 2025-06-15 20:52:34.008944 [W:onnxruntime:, graph.cc:118 MergeShapeInfo] Error merging shape info for output. '/encoder/layers.1/fc2/Add_output_0' source:{16} target:{-1,-1,16}. Falling back to lenient merge. 2025-06-15 20:52:34.018753 [E:onnxruntime:, sequential_executor.cc:572 ExecuteKernel] Non-zero status code returned while running BiasGelu node. Name:'node_BiasGelu_26' Status Message: Input 1 is expected to have 1 dimensions, got 3 ... onnxruntime.capi.onnxruntime_pybind11_state.InvalidArgument: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Non-zero status code returned while running BiasGelu node. Name:'node_BiasGelu_26' Status Message: Input 1 is expected to have 1 dimensions, got 3 ``` with: ```sh uv pip install git+https://github.com/karelze/onnxscript.git@fix-bias-gelu-shape --force-reinstall ``` ``` Applied 1 of general pattern rewrite rules. {'erf_gelu': 0, 'rms_normalization': 0, 'skip_layer_normalization': 0, 'skip_rms_normalization': 0, 'rotary_embedding': 0, 'partial_rotary_embedding': 0, 'cos_sin_cache': 0, 'sdpa': 0, 'gqa': 0, 'packed_qkv_for_gqa': 0, 'mha1': 0, 'mha2': 0, 'mha_bias': 0, 'attention': 0, 'gelu': 0, 'bias_gelu': 2} abs_difference 0.0 ``` This pr adds: - additional checks for dim of bias - additional test cases Sorry for the inconvenience. @justinchuby @titaiwangms
diff --git a/onnxscript/rewriter/ort_fusions/bias_gelu.py b/onnxscript/rewriter/ort_fusions/bias_gelu.py
@@ -2,7 +2,7 @@
 # Licensed under the MIT License.
 from __future__ import annotations
 
-from onnxscript.rewriter import _fusion_utils, pattern
+from onnxscript.rewriter import _fusion_utils, _ir_utils, pattern
 
 
 class BiasGeluFusion(pattern.RewriteRuleClassBase):
@@ -22,30 +22,35 @@ def __init__(
         super().__init__(name)
         self._contrib_op = contrib_op
 
-    def pattern(self, op, x, y):
-        gelu_add = op.Add(x, y)
+    def pattern(self, op, input, bias):
+        gelu_add = op.Add(input, bias)
+
         if self._contrib_op:
             return op.Gelu(gelu_add, _domain="com.microsoft", _outputs=["gelu"])
         else:
             return op.Gelu(gelu_add, _outputs=["gelu"])
 
-    def check(self, op, gelu, **_) -> pattern.MatchResult:
+    def check(self, op, gelu, input, bias, **_) -> pattern.MatchResult:
         check_result = pattern.MatchResult()
         approximate = gelu.producer().attributes.get_string("approximate")
         if approximate is not None and approximate == "tanh":
             return check_result.fail(
                 "Gelu operator with 'approximate' set to 'tanh' is not supported."
             )
+
+        if not _ir_utils.has_rank(bias, 1):
+            return check_result.fail("bias is not of shape 1D tensor", bias)
+
         return check_result
 
-    def rewrite(self, op, x, y, **_):
-        return op.BiasGelu(x, y, _domain="com.microsoft")
+    def rewrite(self, op, input, bias, **_):
+        return op.BiasGelu(input, bias, _domain="com.microsoft")
 
 
 bias_gelu_rules = pattern.RewriteRuleSet(
     [
-        BiasGeluFusion.rule("gelu_onnx_op", contrib_op=False),
-        BiasGeluFusion.rule("gelu_contrib_op", contrib_op=True),
+        *BiasGeluFusion.rule("gelu_onnx_op", contrib_op=False).commute(),
+        *BiasGeluFusion.rule("gelu_contrib_op", contrib_op=True).commute(),
     ]
 )
 
diff --git a/onnxscript/rewriter/ort_fusions/bias_gelu_test.py b/onnxscript/rewriter/ort_fusions/bias_gelu_test.py
@@ -18,27 +18,39 @@
 
 
 @script()
-def _test_script_onnx_default(x: FLOAT[10], y: FLOAT[10]) -> FLOAT[10]:
+def _test_script_onnx_default(x: FLOAT[10, 10], y: FLOAT[10]) -> FLOAT[10]:
     gelu_add = op.Add(x, y)
     return op.Gelu(gelu_add)
 
 
 @script()
-def _test_script_onnx_none(x: FLOAT[10], y: FLOAT[10]) -> FLOAT[10]:
+def _test_script_onnx_none(x: FLOAT[10, 10], y: FLOAT[10]) -> FLOAT[10]:
     gelu_add = op.Add(x, y)
     return op.Gelu(gelu_add, approximate="none")
 
 
 @script()
-def _test_script_onnx_unsupported(x: FLOAT[10], y: FLOAT[10]) -> FLOAT[10]:
+def _test_script_msft_op(x: FLOAT[10, 10], y: FLOAT[10]) -> FLOAT[10]:
     gelu_add = op.Add(x, y)
-    return op.Gelu(gelu_add, approximate="tanh")
+    return msft_op.Gelu(gelu_add)
+
+
+@script()
+def _test_script_reversed_order(x: FLOAT[10, 10], y: FLOAT[10]) -> FLOAT[10]:
+    gelu_add = op.Add(y, x)
+    return op.Gelu(gelu_add)
 
 
 @script()
-def _test_script_msft_op(x: FLOAT[10], y: FLOAT[10]) -> FLOAT[10]:
+def _test_script_onnx_unsupported(x: FLOAT[10, 10], y: FLOAT[10]) -> FLOAT[10]:
     gelu_add = op.Add(x, y)
-    return msft_op.Gelu(gelu_add)
+    return op.Gelu(gelu_add, approximate="tanh")
+
+
+@script()
+def _test_script_shape_unsupported(x: FLOAT[10, 10], y: FLOAT[10]) -> FLOAT[10]:
+    gelu_add = op.Add(x, x)
+    return op.Gelu(gelu_add)
 
 
 class BiasGeluFusionTest(unittest.TestCase):
@@ -54,7 +66,7 @@ def _check(
         optimize(model)
 
         input = {
-            "x": np.random.randn(10).astype(np.float32),
+            "x": np.random.randn(10, 10).astype(np.float32),
             "y": np.random.randn(10).astype(np.float32),
         }
         original_output = test_utils.ort_run("Original", model, input)
@@ -73,6 +85,7 @@ def _check(
             ("with_onnx_op_default", _test_script_onnx_default, 1, "BiasGelu"),
             ("with_onnx_op_none", _test_script_onnx_none, 1, "BiasGelu"),
             ("with_contrib_op", _test_script_msft_op, 1, "BiasGelu"),
+            ("reversed_order", _test_script_reversed_order, 1, "BiasGelu"),
         ]
     )
     def test_bias_gelu_fusion(
@@ -87,6 +100,7 @@ def test_bias_gelu_fusion(
     @parameterized.parameterized.expand(
         [
             ("approximate_tanh", _test_script_onnx_unsupported, 2, "Add"),
+            ("unsupported_shape", _test_script_shape_unsupported, 2, "Add"),
         ]
     )
     def test_bias_gelu_fusion_unsupported_attr(