Skip to content

[optimizer] Replace value.nbytes with value.size #2399

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions onnxscript/optimizer/_constant_folding.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
import onnxscript.utils.utils as utils
from onnxscript.ir import _tape

DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT = 1024
DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT = 512

DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT = 1024 * 1024
DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT = 512 * 512


_NON_DETERMINISTIC_OPS = frozenset(
Expand Down Expand Up @@ -944,21 +944,21 @@ def new_constant(self, node: ir.Node, value) -> ir.Node | None:
tensor.name = irvalue.name
irvalue.const_value = tensor

if value.nbytes > self.output_size_limit:
if value.size > self.output_size_limit:
# Handle examples like Transpose(weight) to be folded even if the size is large,
# as long as weight has no other uses. This won't increase model size.
removed_input_size = 0
for input in node.inputs:
if (input is not None) and (len(input.uses()) == 1):
array = _get_numpy_value(input)
if array is not None:
removed_input_size += array.nbytes
increased_size = value.nbytes - removed_input_size
removed_input_size += array.size
increased_size = value.size - removed_input_size
if increased_size > 0:
logger.info(
"Skip storing constant folded nvalue %s due to large size %s.",
irvalue.name,
value.nbytes,
value.size,
)
return None

Expand Down Expand Up @@ -1029,9 +1029,8 @@ def process_node(self, node: ir.Node) -> Replacement | None:
return None

input_tensors = [x.const_value if x is not None else None for x in node.inputs]

if any(
tensor.nbytes > self.input_size_limit
tensor.size > self.input_size_limit
for tensor in input_tensors
if tensor is not None
):
Expand All @@ -1048,7 +1047,7 @@ def process_node(self, node: ir.Node) -> Replacement | None:
# Skip folding large tensors
if logger.isEnabledFor(logging.DEBUG):
input_sizes = [
tensor.nbytes for tensor in input_tensors if tensor is not None
tensor.size for tensor in input_tensors if tensor is not None
]
logger.debug(
"Skipping constant folding for node %s due to large input size: %s",
Expand Down Expand Up @@ -1190,10 +1189,10 @@ def fold_constants(
model: The ONNX model to optimize.
onnx_shape_inference: Whether to enable ONNX shape inference during
constant folding. Defaults to False.
input_size_limit: The maximum size (in bytes) of input tensors
input_size_limit: The maximum size of input tensors
that can be considered for constant folding. Defaults to
`DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT`.
output_size_limit: The maximum size (in bytes) of output tensors
output_size_limit: The maximum size of output tensors
that can be stored after constant folding. Defaults to
`DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT`.
always_fold_ops: A collection of op types that should always be folded,
Expand Down
6 changes: 2 additions & 4 deletions onnxscript/optimizer/_constant_folding_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,15 +552,13 @@ def test_input_size_limit(self):
w.const_value = ir.tensor(np.random.random((256, 256)).astype(np.float32))

# Input size limit will prevent folding of Mul op
optimized = self._fold(model, input_size_limit=3 * 256 * 256)
optimized = self._fold(model, onnx_shape_inference=False, input_size_limit=128 * 128)
ops = [node.op_type for node in optimized.graph]
self.assertEqual(ops, ["Mul", "Add"])

# Input size limit will allow folding of Mul op
# Since there is no increase in model-size, output-size is not a concern.
optimized = self._fold(
model, input_size_limit=4 * 256 * 256, output_size_limit=4 * 256 * 256
)
optimized = self._fold(model, input_size_limit=256 * 256, output_size_limit=256 * 256)
ops = [node.op_type for node in optimized.graph]
self.assertEqual(ops, ["Constant", "Add"])

Expand Down
2 changes: 1 addition & 1 deletion onnxscript/version_converter/_c_api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def call_onnx_api(func: Callable[[onnx.ModelProto], _R], model: ir.Model) -> _R:
initializer.dtype = initializer.const_value.dtype
if initializer not in model.graph.inputs:
model.graph.inputs.append(initializer)
if initializer.const_value.nbytes > _BIG_TENSOR_SIZE_LIMIT:
if initializer.const_value.size > _BIG_TENSOR_SIZE_LIMIT:
# Temporarily remove the initializer value to reduce model size
# for onnx.shape_inference
initializer.const_value = None
Expand Down
Loading