From 2bfcf5371d36cd31b28a302e3afa8d9399be78c2 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Wed, 18 Jun 2025 20:10:40 +0000 Subject: [PATCH 1/3] replace value.nbytes with value.size --- onnxscript/ir/passes/common/_c_api_utils.py | 2 +- onnxscript/optimizer/_constant_folding.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/onnxscript/ir/passes/common/_c_api_utils.py b/onnxscript/ir/passes/common/_c_api_utils.py index bb2715c75c..c2d11a4d2a 100644 --- a/onnxscript/ir/passes/common/_c_api_utils.py +++ b/onnxscript/ir/passes/common/_c_api_utils.py @@ -51,7 +51,7 @@ def call_onnx_api(func: Callable[[onnx.ModelProto], _R], model: ir.Model) -> _R: initializer.dtype = initializer.const_value.dtype if initializer not in model.graph.inputs: model.graph.inputs.append(initializer) - if initializer.const_value.nbytes > _BIG_TENSOR_SIZE_LIMIT: + if initializer.size > _BIG_TENSOR_SIZE_LIMIT: # Temporarily remove the initializer value to reduce model size # for onnx.shape_inference initializer.const_value = None diff --git a/onnxscript/optimizer/_constant_folding.py b/onnxscript/optimizer/_constant_folding.py index 1c6a10a2c0..6da9063620 100644 --- a/onnxscript/optimizer/_constant_folding.py +++ b/onnxscript/optimizer/_constant_folding.py @@ -944,7 +944,7 @@ def new_constant(self, node: ir.Node, value) -> ir.Node | None: tensor.name = irvalue.name irvalue.const_value = tensor - if value.nbytes > self.output_size_limit: + if value.size > self.output_size_limit: # Handle examples like Transpose(weight) to be folded even if the size is large, # as long as weight has no other uses. This won't increase model size. removed_input_size = 0 @@ -952,13 +952,13 @@ def new_constant(self, node: ir.Node, value) -> ir.Node | None: if (input is not None) and (len(input.uses()) == 1): array = _get_numpy_value(input) if array is not None: - removed_input_size += array.nbytes - increased_size = value.nbytes - removed_input_size + removed_input_size += array.size + increased_size = value.size - removed_input_size if increased_size > 0: logger.info( "Skip storing constant folded nvalue %s due to large size %s.", irvalue.name, - value.nbytes, + value.size, ) return None @@ -1031,7 +1031,7 @@ def process_node(self, node: ir.Node) -> Replacement | None: input_tensors = [x.const_value if x is not None else None for x in node.inputs] if any( - tensor.nbytes > self.input_size_limit + tensor.size > self.input_size_limit for tensor in input_tensors if tensor is not None ): @@ -1048,7 +1048,7 @@ def process_node(self, node: ir.Node) -> Replacement | None: # Skip folding large tensors if logger.isEnabledFor(logging.DEBUG): input_sizes = [ - tensor.nbytes for tensor in input_tensors if tensor is not None + tensor.size for tensor in input_tensors if tensor is not None ] logger.debug( "Skipping constant folding for node %s due to large input size: %s", From 9550ad551189a8ce34ecbfcad6dc6ed197d71223 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Thu, 19 Jun 2025 04:30:57 +0000 Subject: [PATCH 2/3] update threshold --- onnxscript/optimizer/_constant_folding.py | 9 ++++----- onnxscript/optimizer/_constant_folding_test.py | 7 +++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/onnxscript/optimizer/_constant_folding.py b/onnxscript/optimizer/_constant_folding.py index 6da9063620..4378b6c3f6 100644 --- a/onnxscript/optimizer/_constant_folding.py +++ b/onnxscript/optimizer/_constant_folding.py @@ -19,9 +19,9 @@ import onnxscript.utils.utils as utils from onnxscript.ir import _tape -DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT = 1024 +DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT = 512 -DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT = 1024 * 1024 +DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT = 512 * 512 _NON_DETERMINISTIC_OPS = frozenset( @@ -1029,7 +1029,6 @@ def process_node(self, node: ir.Node) -> Replacement | None: return None input_tensors = [x.const_value if x is not None else None for x in node.inputs] - if any( tensor.size > self.input_size_limit for tensor in input_tensors @@ -1190,10 +1189,10 @@ def fold_constants( model: The ONNX model to optimize. onnx_shape_inference: Whether to enable ONNX shape inference during constant folding. Defaults to False. - input_size_limit: The maximum size (in bytes) of input tensors + input_size_limit: The maximum size of input tensors that can be considered for constant folding. Defaults to `DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT`. - output_size_limit: The maximum size (in bytes) of output tensors + output_size_limit: The maximum size of output tensors that can be stored after constant folding. Defaults to `DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT`. always_fold_ops: A collection of op types that should always be folded, diff --git a/onnxscript/optimizer/_constant_folding_test.py b/onnxscript/optimizer/_constant_folding_test.py index 20f116c7d9..7bc94ce6bd 100644 --- a/onnxscript/optimizer/_constant_folding_test.py +++ b/onnxscript/optimizer/_constant_folding_test.py @@ -15,6 +15,7 @@ class FoldConstantsTest(unittest.TestCase): def _fold(self, model: ir.Model | str, onnx_shape_inference=False, **kwargs): + print("Folding constants with kwargs:", kwargs) if isinstance(model, str): model = ir.from_onnx_text(model) _constant_folding.fold_constants( @@ -552,15 +553,13 @@ def test_input_size_limit(self): w.const_value = ir.tensor(np.random.random((256, 256)).astype(np.float32)) # Input size limit will prevent folding of Mul op - optimized = self._fold(model, input_size_limit=3 * 256 * 256) + optimized = self._fold(model, onnx_shape_inference=False, input_size_limit=128 * 128) ops = [node.op_type for node in optimized.graph] self.assertEqual(ops, ["Mul", "Add"]) # Input size limit will allow folding of Mul op # Since there is no increase in model-size, output-size is not a concern. - optimized = self._fold( - model, input_size_limit=4 * 256 * 256, output_size_limit=4 * 256 * 256 - ) + optimized = self._fold(model, input_size_limit=256 * 256, output_size_limit=256 * 256) ops = [node.op_type for node in optimized.graph] self.assertEqual(ops, ["Constant", "Add"]) From 9890732c862e4ca26d6f02bbc5ef52f76357da09 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Thu, 19 Jun 2025 16:23:39 +0000 Subject: [PATCH 3/3] address ci and remove printout --- onnxscript/ir/passes/common/_c_api_utils.py | 2 +- onnxscript/optimizer/_constant_folding_test.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/onnxscript/ir/passes/common/_c_api_utils.py b/onnxscript/ir/passes/common/_c_api_utils.py index c2d11a4d2a..7f9ac687f4 100644 --- a/onnxscript/ir/passes/common/_c_api_utils.py +++ b/onnxscript/ir/passes/common/_c_api_utils.py @@ -51,7 +51,7 @@ def call_onnx_api(func: Callable[[onnx.ModelProto], _R], model: ir.Model) -> _R: initializer.dtype = initializer.const_value.dtype if initializer not in model.graph.inputs: model.graph.inputs.append(initializer) - if initializer.size > _BIG_TENSOR_SIZE_LIMIT: + if initializer.const_value.size > _BIG_TENSOR_SIZE_LIMIT: # Temporarily remove the initializer value to reduce model size # for onnx.shape_inference initializer.const_value = None diff --git a/onnxscript/optimizer/_constant_folding_test.py b/onnxscript/optimizer/_constant_folding_test.py index 7bc94ce6bd..e58ee0ba19 100644 --- a/onnxscript/optimizer/_constant_folding_test.py +++ b/onnxscript/optimizer/_constant_folding_test.py @@ -15,7 +15,6 @@ class FoldConstantsTest(unittest.TestCase): def _fold(self, model: ir.Model | str, onnx_shape_inference=False, **kwargs): - print("Folding constants with kwargs:", kwargs) if isinstance(model, str): model = ir.from_onnx_text(model) _constant_folding.fold_constants(