[IR] Create a shape inference pass using onnx shape inference (microsoft#2117)

justinchuby · bmehta001 · commit f90c2d8985aa · 2025-04-11T17:20:31.000Z
It handles large models by removing the initializers before sending the
model to onnx shape inference.
diff --git a/onnxscript/ir/_io.py b/onnxscript/ir/_io.py
@@ -78,7 +78,7 @@ def save(
 
         # Store the original initializer values so they can be restored if modify_model=False
         initializer_values = tuple(model.graph.initializers.values())
-        tensors = [v.const_value for v in model.graph.initializers.values()]
+        tensors = [v.const_value for v in initializer_values]
 
         try:
             model = _external_data.unload_from_model(
diff --git a/onnxscript/ir/passes/_pass_infra.py b/onnxscript/ir/passes/_pass_infra.py
@@ -58,7 +58,7 @@ class PassResult:
 
     Attributes:
         model: The transformed model.
-        modified: Whether the model was modified.
+        modified: Whether the resulting model is different from the input model.
     """
 
     model: ir.Model
diff --git a/onnxscript/ir/passes/common/shape_inference.py b/onnxscript/ir/passes/common/shape_inference.py
@@ -0,0 +1,138 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""Shape inference pass using onnx.shape_inference."""
+
+from __future__ import annotations
+
+__all__ = [
+    "ShapeInferencePass",
+    "infer_shapes",
+]
+
+import logging
+
+import onnx
+
+from onnxscript import ir
+
+logger = logging.getLogger(__name__)
+
+# Temporarily remove initializers larger than this size to keep model size down
+# for the onnx.shape_inference call because it needs to serialize the model
+_BIG_TENSOR_SIZE_LIMIT = 1000  # 1KB
+
+
+class ShapeInferencePass(ir.passes.PassBase):
+    """This pass performs shape inference on the graph."""
+
+    # This pass does not modify the model in place.
+    in_place = False
+
+    def __init__(
+        self, check_type: bool = True, strict_mode: bool = True, data_prop: bool = True
+    ) -> None:
+        """Initialize the shape inference pass.
+
+        Args:
+            check_type: If True, check the types of the inputs and outputs.
+            strict_mode: If True, use strict mode for shape inference.
+            data_prop: If True, use data propagation for shape inference.
+        """
+        super().__init__()
+        self.check_type = check_type
+        self.strict_mode = strict_mode
+        self.data_prop = data_prop
+
+    def call(self, model: ir.Model) -> ir.passes.PassResult:
+        # Store the original initializer values so they can be restored
+        initializer_values = tuple(model.graph.initializers.values())
+        tensors = {v.name: v.const_value for v in initializer_values}
+        original_inputs_len = len(model.graph.inputs)
+        initializer_names = {v.name for v in initializer_values}
+
+        # Turn the initializers into inputs and clear the initializers
+        # to limit the model size
+        for initializer in initializer_values:
+            # Make sure the initializer has its shape/type set
+            assert initializer.const_value is not None
+            if initializer.shape is None:
+                initializer.shape = initializer.const_value.shape  # type: ignore[assignment]
+            if initializer.dtype is None:
+                initializer.dtype = initializer.const_value.dtype
+            if initializer not in model.graph.inputs:
+                model.graph.inputs.append(initializer)
+            if initializer.const_value.nbytes > _BIG_TENSOR_SIZE_LIMIT:
+                # Temporarily remove the initializer value to reduce model size
+                # for onnx.shape_inference
+                initializer.const_value = None
+                assert initializer.name is not None
+                model.graph.initializers.pop(initializer.name)
+
+        # Perform shape inference
+        try:
+            proto = ir.serde.serialize_model(model)
+            value_infos = {info.name: info for info in proto.graph.value_info}
+            inferred_proto = onnx.shape_inference.infer_shapes(
+                proto,
+                check_type=self.check_type,
+                strict_mode=self.strict_mode,
+                data_prop=self.data_prop,
+            )
+            inferred_value_infos = {
+                info.name: info for info in inferred_proto.graph.value_info
+            }
+            inferred_model = ir.serde.deserialize_model(inferred_proto)
+
+        except Exception:  # pylint: disable=broad-exception-caught
+            logger.warning("Shape inference failed. The model is not modified", exc_info=True)
+            return ir.passes.PassResult(model, modified=False)
+        finally:
+            # Restore the original initializer values so the model is unchanged
+            for initializer in initializer_values:
+                if initializer.name in initializer_names:
+                    initializer.const_value = tensors[initializer.name]
+                    model.graph.register_initializer(initializer)
+
+            # Restore the original inputs
+            inputs = model.graph.inputs[:original_inputs_len]
+            model.graph.inputs.clear()
+            model.graph.inputs.extend(inputs)
+
+        # Add the original initializer tensors to the new (inferred) model
+        for new_input in inferred_model.graph.inputs:
+            # Assign the tensors back to the initializers
+            if new_input.name in initializer_names:
+                new_input.const_value = tensors[new_input.name]
+                inferred_model.graph.register_initializer(new_input)
+
+        # Remove the inputs that were added
+        new_inputs = inferred_model.graph.inputs[:original_inputs_len]
+        inferred_model.graph.inputs.clear()
+        inferred_model.graph.inputs.extend(new_inputs)
+
+        return ir.passes.PassResult(
+            inferred_model, modified=value_infos != inferred_value_infos
+        )
+
+
+def infer_shapes(
+    model: ir.Model,
+    *,
+    check_type: bool = True,
+    strict_mode: bool = True,
+    data_prop: bool = True,
+) -> ir.Model:
+    """Perform shape inference on the model.
+
+    Args:
+        model: The model to perform shape inference on.
+        check_type: If True, check the types of the inputs and outputs.
+        strict_mode: If True, use strict mode for shape inference.
+        data_prop: If True, use data propagation for shape inference.
+
+    Returns:
+        The model with shape inference applied.
+    """
+    return ShapeInferencePass(
+        check_type=check_type, strict_mode=strict_mode, data_prop=data_prop
+    )(model).model
diff --git a/onnxscript/ir/passes/common/shape_inference_test.py b/onnxscript/ir/passes/common/shape_inference_test.py
@@ -0,0 +1,148 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+from __future__ import annotations
+
+import unittest
+
+import numpy as np
+
+from onnxscript import ir
+from onnxscript.ir.passes.common import shape_inference
+
+
+class TestShapeInferencePass(unittest.TestCase):
+    def test_pass(self):
+        # Create a simple ONNX model with shape inference
+        # Define the model
+        inputs = [
+            ir.Value(
+                name="input_a", type=ir.TensorType(ir.DataType.FLOAT), shape=ir.Shape((1, 2))
+            ),
+            ir.Value(
+                name="input_b", type=ir.TensorType(ir.DataType.FLOAT), shape=ir.Shape((1, 2))
+            ),
+        ]
+
+        add_node = ir.Node("", "Add", inputs=inputs)
+
+        model = ir.Model(
+            ir.Graph(
+                inputs=inputs,
+                outputs=add_node.outputs,
+                nodes=[add_node],
+                opset_imports={"": 20},
+            ),
+            ir_version=10,
+        )
+        self.assertIsNone(add_node.outputs[0].shape)
+        self.assertIsNone(add_node.outputs[0].dtype)
+
+        # Perform shape inference
+        result = shape_inference.ShapeInferencePass()(model)
+        self.assertTrue(result.modified)
+        self.assertEqual(result.model.graph.node(0).outputs[0].shape, ir.Shape((1, 2)))
+        self.assertEqual(result.model.graph.node(0).outputs[0].dtype, ir.DataType.FLOAT)
+        self.assertEqual(result.model.graph.outputs[0].shape, ir.Shape((1, 2)))
+        self.assertEqual(result.model.graph.outputs[0].dtype, ir.DataType.FLOAT)
+
+    def test_pass_with_initializers(self):
+        # _BIG_TENSOR_SIZE_LIMIT is in bytes, but we create big_dim as size
+        # of a tensor. This is fine as we just need to create a big tensor whose size
+        # passes _BIG_TENSOR_SIZE_LIMIT
+        big_dim = shape_inference._BIG_TENSOR_SIZE_LIMIT * 2  # pylint: disable=protected-access
+        inputs = [
+            ir.Value(
+                name="input_a", type=ir.TensorType(ir.DataType.FLOAT), shape=ir.Shape((1, 2))
+            ),
+            ir.Value(
+                name="input_b",
+                type=ir.TensorType(ir.DataType.FLOAT),
+                shape=ir.Shape((big_dim, 1)),
+                const_value=ir.tensor([[42]] * big_dim, dtype=ir.DataType.FLOAT),
+            ),
+        ]
+
+        # Shape and type are not explicitly set for the initializer but it should still work
+        initializer = ir.Value(
+            name="initializer", const_value=ir.tensor([[2, 3]], dtype=ir.DataType.FLOAT)
+        )
+
+        add_node = ir.Node("", "Add", inputs=[*inputs])
+        mul_node = ir.Node("", "Mul", inputs=[add_node.outputs[0], initializer])
+
+        model = ir.Model(
+            graph := ir.Graph(
+                inputs=inputs,
+                outputs=mul_node.outputs,
+                nodes=[add_node, mul_node],
+                opset_imports={"": 20},
+            ),
+            ir_version=10,
+        )
+        graph.register_initializer(inputs[1])
+        graph.register_initializer(initializer)
+
+        self.assertIsNone(add_node.outputs[0].shape)
+        self.assertIsNone(add_node.outputs[0].dtype)
+        self.assertIsNone(mul_node.outputs[0].shape)
+        self.assertIsNone(mul_node.outputs[0].dtype)
+        self.assertIsNone(initializer.shape)
+        self.assertIsNone(initializer.dtype)
+
+        # Perform shape inference
+        result = shape_inference.ShapeInferencePass()(model)
+        self.assertTrue(result.modified)
+        self.assertEqual(result.model.graph.node(0).outputs[0].shape, ir.Shape((big_dim, 2)))
+        self.assertEqual(result.model.graph.node(0).outputs[0].dtype, ir.DataType.FLOAT)
+        self.assertEqual(result.model.graph.node(1).outputs[0].shape, ir.Shape((big_dim, 2)))
+        self.assertEqual(result.model.graph.node(1).outputs[0].dtype, ir.DataType.FLOAT)
+        self.assertEqual(
+            result.model.graph.initializers["initializer"].shape, ir.Shape((1, 2))
+        )
+        self.assertEqual(
+            result.model.graph.initializers["initializer"].dtype, ir.DataType.FLOAT
+        )
+        self.assertEqual(result.model.graph.outputs[0].shape, ir.Shape((big_dim, 2)))
+        self.assertEqual(result.model.graph.outputs[0].dtype, ir.DataType.FLOAT)
+
+        # Check that the initializer correctly appears in the result
+        self.assertEqual(len(result.model.graph.inputs), 2)
+        self.assertEqual(len(result.model.graph.initializers), 2)
+        np.testing.assert_array_equal(
+            result.model.graph.initializers["input_b"].const_value.numpy(),
+            np.array([[42]] * big_dim, dtype=np.float32),
+            strict=True,
+        )
+        self.assertEqual(
+            result.model.graph.initializers["input_b"].const_value.dtype,
+            ir.DataType.FLOAT,
+        )
+        np.testing.assert_array_equal(
+            result.model.graph.initializers["initializer"].const_value.numpy(),
+            np.array([[2.0, 3.0]], dtype=np.float32),
+            strict=True,
+        )
+        self.assertEqual(
+            result.model.graph.initializers["initializer"].const_value.dtype,
+            ir.DataType.FLOAT,
+        )
+
+        # Check that the original model is not modified
+        self.assertIsNone(add_node.outputs[0].shape)
+        self.assertIsNone(add_node.outputs[0].dtype)
+        self.assertIsNone(mul_node.outputs[0].shape)
+        self.assertIsNone(mul_node.outputs[0].dtype)
+        self.assertEqual(len(model.graph.inputs), 2)
+        self.assertEqual(len(model.graph.initializers), 2)
+        self.assertIs(model.graph.initializers["input_b"].const_value, inputs[1].const_value)
+        self.assertEqual(len(model.graph.outputs), 1)
+        self.assertEqual(model.graph.outputs[0].shape, None)
+        self.assertEqual(model.graph.outputs[0].dtype, None)
+        # Check that the initializer is not modified
+        self.assertIs(
+            model.graph.initializers["initializer"].const_value, initializer.const_value
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()