Add deduplication pass for initializer tensors (#67)

AbhishekHerbertSamuel · justinchuby · web-flow · commit d8fa01187999 · 2025-06-19T13:45:14.000-07:00
### Summary This PR adds a new graph transformation pass: `DeduplicateInitializersPass`. It removes duplicate initializer tensors (typically model weights) based on a unique fingerprint derived from: - Tensor byte content (`tobytes()`) - Data type (`dtype`) - Shape All redundant initializers are removed, and nodes referencing them are updated to use the canonical (first-seen) tensor. --- ### Implementation Details - Fingerprints are tracked using a dictionary: `(tobytes, dtype, shape) → name` - Redundant initializers are removed using `graph.initializers.pop(...)` - Node inputs are updated via `node.replace_input_with(...)` for correctness and safety --- ### Benefits - Reduces memory and file size by eliminating duplicated weight tensors - Simplifies graph structure for downstream optimization and export --- ### File Added - `src/onnx_ir/passes/common/deduplicate_initializers.py` ### Closes Closes #66 --------- Signed-off-by: Abhishek Herbert Samuel <abhishekherbertsamuel@gmail.com> Signed-off-by: Justin Chu <justinchuby@users.noreply.github.com> Co-authored-by: Justin Chu <justinchuby@users.noreply.github.com>
diff --git a/src/onnx_ir/passes/common/initializer_deduplication.py b/src/onnx_ir/passes/common/initializer_deduplication.py
@@ -0,0 +1,54 @@
+# Copyright (c) ONNX Project Contributors
+# SPDX-License-Identifier: Apache-2.0
+"""Pass for removing duplicated initializer tensors from a graph."""
+
+from __future__ import annotations
+
+__all__ = [
+    "DeduplicateInitializersPass",
+]
+
+
+import onnx_ir as ir
+
+
+class DeduplicateInitializersPass(ir.passes.InPlacePass):
+    """Remove duplicated initializer tensors from the graph.
+
+    This pass detects initializers with identical shape, dtype, and content,
+    and replaces all duplicate references with a canonical one.
+
+    To deduplicate initializers from subgraphs, use :class:`~onnx_ir.passes.common.LiftSubgraphInitializersToMainGraphPass`
+    to lift the initializers to the main graph first before running pass.
+    """
+
+    def __init__(self, size_limit: int = 1024):
+        super().__init__()
+        self.size_limit = size_limit
+
+    def call(self, model: ir.Model) -> ir.passes.PassResult:
+        graph = model.graph
+        initializers: dict[tuple[ir.DataType, tuple[int, ...], bytes], ir.Value] = {}
+        modified = False
+
+        for initializer in tuple(graph.initializers.values()):
+            # TODO(justinchuby): Handle subgraphs as well. For now users can lift initializers
+            # out from the main graph before running this pass.
+            const_val = initializer.const_value
+            if const_val is None:
+                # Skip if initializer has no constant value
+                continue
+
+            if const_val.size > self.size_limit:
+                continue
+
+            key = (const_val.dtype, tuple(const_val.shape), const_val.tobytes())
+            if key in initializers:
+                modified = True
+                ir.convenience.replace_all_uses_with(initializer, initializers[key])  # type: ignore[index]
+                assert initializer.name is not None
+                graph.initializers.pop(initializer.name)
+            else:
+                initializers[key] = initializer  # type: ignore[index]
+
+        return ir.passes.PassResult(model=model, modified=modified)
diff --git a/src/onnx_ir/passes/common/initializer_deduplication_test.py b/src/onnx_ir/passes/common/initializer_deduplication_test.py
@@ -0,0 +1,100 @@
+# Copyright (c) ONNX Project Contributors
+# SPDX-License-Identifier: Apache-2.0
+"""Unit tests for the initializer_deduplication passes."""
+
+import unittest
+
+import onnx_ir as ir
+from onnx_ir.passes.common import initializer_deduplication
+
+
+class DeduplicateInitializersTest(unittest.TestCase):
+    def apply_pass(self, model: ir.Model) -> ir.Model:
+        result = initializer_deduplication.DeduplicateInitializersPass()(model)
+        return result.model
+
+    def test_deduplicates_identical_initializers(self):
+        model = ir.from_onnx_text(
+            """
+            <ir_version: 10, opset_import: ["" : 17]>
+            agraph () => ()
+            <float[3] w1 = {1.0, 2.0, 3.0}, float[3] w2 = {1.0, 2.0, 3.0}> {
+                sum = Add(w1, w2)
+            }
+            """
+        )
+        self.assertEqual(len(model.graph.initializers), 2)
+        new_model = self.apply_pass(model)
+        self.assertEqual(len(new_model.graph.initializers), 1)
+        add_node = new_model.graph[0]
+        self.assertEqual(add_node.inputs[0], add_node.inputs[1])
+
+    def test_initializers_with_different_shapes_not_deduplicated(self):
+        model = ir.from_onnx_text(
+            """
+            <ir_version: 10, opset_import: ["" : 17]>
+            agraph () => ()
+            <float[2] w1 = {1.0, 2.0}, float[1,2] w2 = {1.0, 2.0}> {
+                sum = Add(w1, w2)
+            }
+            """
+        )
+        new_model = self.apply_pass(model)
+        self.assertEqual(len(new_model.graph.initializers), 2)
+
+    def test_initializers_with_different_dtypes_not_deduplicated(self):
+        model = ir.from_onnx_text(
+            """
+            <ir_version: 10, opset_import: ["" : 17]>
+            agraph () => ()
+            <float[2] w1 = {1.0, 2.0}, double[2] w2 = {1.0, 2.0}> {
+                sum = Add(w1, w2)
+            }
+            """
+        )
+        new_model = self.apply_pass(model)
+        self.assertEqual(len(new_model.graph.initializers), 2)
+
+    def test_scalar_initializer_deduplication(self):
+        model = ir.from_onnx_text(
+            """
+            <ir_version: 10, opset_import: ["" : 17]>
+            agraph () => ()
+            <float w1 = {5.0}, float w2 = {5.0}> {
+                sum = Add(w1, w2)
+            }
+            """
+        )
+        new_model = self.apply_pass(model)
+        self.assertEqual(len(new_model.graph.initializers), 1)
+
+    def test_multiple_duplicates(self):
+        model = ir.from_onnx_text(
+            """
+            <ir_version: 10, opset_import: ["" : 17]>
+            agraph () => ()
+            <float[2] w1 = {1.0, 1.0}, float[2] w2 = {1.0, 1.0}, float[2] w3 = {1.0, 1.0}> {
+                temp = Add(w1, w2)
+                out = Add(temp, w3)
+            }
+            """
+        )
+        new_model = self.apply_pass(model)
+        self.assertEqual(len(new_model.graph.initializers), 1)
+
+    def test_unique_values_not_deduplicated(self):
+        model = ir.from_onnx_text(
+            """
+            <ir_version: 10, opset_import: ["" : 17]>
+            agraph () => ()
+            <float[2] w1 = {1.0, 2.0}, float[2] w2 = {2.0, 1.0}> {
+                sum = Add(w1, w2)
+            }
+            """
+        )
+        new_model = self.apply_pass(model)
+        self.assertEqual(len(new_model.graph.initializers), 2)
+
+
+if __name__ == "__main__":
+    unittest.main()