feat: add onnxslim (#2258)

inisis · IlyasMoutawwakil · web-flow · commit d1af494869c6 · 2025-05-16T19:10:27.000+02:00
* feat: add onnxslim

* fix style and rename simplify to slim

* add onnxslim tests

* fix format

* add slim args for main_export and make slim true in test_export_cli for tests

* fix format

* add is_onnxslim_available func and add onnxslim to test dependency

* refactor format and pin onnxslim to 0.1.53

* Update optimum/exporters/onnx/convert.py

Co-authored-by: Ilyas Moutawwakil &lt;57442720+IlyasMoutawwakil@users.noreply.github.com&gt;

* Update tests/exporters/onnx/test_export_cli.py

Co-authored-by: Ilyas Moutawwakil &lt;57442720+IlyasMoutawwakil@users.noreply.github.com&gt;

* use glob and refactor tests

* Update optimum/exporters/onnx/convert.py

* Update tests/exporters/onnx/test_export_cli.py

* remove glob

* fix tests error

* add slim to _onnx_export

* Update optimum/exporters/onnx/convert.py

---------

Co-authored-by: Ilyas Moutawwakil &lt;57442720+IlyasMoutawwakil@users.noreply.github.com&gt;
diff --git a/optimum/commands/export/onnx.py b/optimum/commands/export/onnx.py
@@ -169,6 +169,11 @@ def parse_args_onnx(parser):
         action="store_true",
         help="PyTorch-only argument. Disables PyTorch ONNX export constant folding.",
     )
+    optional_group.add_argument(
+        "--slim",
+        action="store_true",
+        help="Enables onnxslim optimization.",
+    )
 
     input_group = parser.add_argument_group(
         "Input shapes (if necessary, this allows to override the shapes of the input given to the ONNX exporter, that requires an example input)."
@@ -286,5 +291,6 @@ def run(self):
             no_dynamic_axes=self.args.no_dynamic_axes,
             model_kwargs=self.args.model_kwargs,
             do_constant_folding=not self.args.no_constant_folding,
+            slim=self.args.slim,
             **input_shapes,
         )
diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py
@@ -78,6 +78,7 @@ def main_export(
     legacy: bool = False,
     no_dynamic_axes: bool = False,
     do_constant_folding: bool = True,
+    slim: bool = False,
     **kwargs_shapes,
 ):
     """
@@ -166,6 +167,8 @@ def main_export(
             If True, disables the use of dynamic axes during ONNX export.
         do_constant_folding (bool, defaults to `True`):
             PyTorch-specific argument. If `True`, the PyTorch ONNX export will fold constants into adjacent nodes, if possible.
+        slim (bool, defaults to `False`):
+            PyTorch-specific argument. If `True`, use onnxslim to optimize the ONNX model.
         **kwargs_shapes (`Dict`):
             Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export.
 
@@ -391,6 +394,7 @@ def main_export(
         task=task,
         use_subprocess=use_subprocess,
         do_constant_folding=do_constant_folding,
+        slim=slim,
         **kwargs_shapes,
     )
 
diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py
@@ -30,12 +30,14 @@
 from transformers.modeling_utils import get_parameter_dtype
 from transformers.utils import is_tf_available, is_torch_available
 
+from ...onnx.graph_transformations import check_and_save_model
 from ...onnx.utils import _get_onnx_external_constants, _get_onnx_external_data_tensors, check_model_uses_external_data
 from ...utils import (
     DEFAULT_DUMMY_SHAPES,
     ONNX_WEIGHTS_NAME,
     TORCH_MINIMUM_VERSION,
     is_diffusers_available,
+    is_onnxslim_available,
     is_torch_onnx_support_available,
     is_transformers_version,
     logging,
@@ -917,6 +919,7 @@ def onnx_export_from_model(
     task: Optional[str] = None,
     use_subprocess: bool = False,
     do_constant_folding: bool = True,
+    slim: bool = False,
     **kwargs_shapes,
 ):
     """
@@ -972,6 +975,8 @@ def onnx_export_from_model(
             If True, disables the use of dynamic axes during ONNX export.
         do_constant_folding (bool, defaults to `True`):
             PyTorch-specific argument. If `True`, the PyTorch ONNX export will fold constants into adjacent nodes, if possible.
+        slim (bool, defaults to `False`):
+            Use onnxslim to optimize the ONNX model.
         **kwargs_shapes (`Dict`):
             Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export.
 
@@ -1196,6 +1201,17 @@ def onnx_export_from_model(
         optimization_config.disable_shape_inference = True
         optimizer.optimize(save_dir=output, optimization_config=optimization_config, file_suffix="")
 
+    if slim:
+        if not is_onnxslim_available():
+            raise ImportError("The pip package `onnxslim` is required to optimize onnx models.")
+
+        from onnxslim import slim
+
+        for subpath in onnx_files_subpaths:
+            file_path = os.path.join(output, subpath)
+            slimmed_model = slim(file_path)
+            check_and_save_model(slimmed_model, file_path)
+
     # Optionally post process the obtained ONNX file(s), for example to merge the decoder / decoder with past if any
     # TODO: treating diffusion separately is quite ugly
     if not no_post_process and library_name != "diffusers":
diff --git a/optimum/utils/__init__.py b/optimum/utils/__init__.py
@@ -41,6 +41,7 @@
     is_gptqmodel_available,
     is_onnx_available,
     is_onnxruntime_available,
+    is_onnxslim_available,
     is_pydantic_available,
     is_sentence_transformers_available,
     is_tf_available,
diff --git a/optimum/utils/import_utils.py b/optimum/utils/import_utils.py
@@ -134,6 +134,7 @@ def _is_package_available(
         "intel-tensorflow-avx512",
     ],
 )
+_onnxslim_available = _is_package_available("onnxslim")
 
 if _tf_available and version.parse(_tf_version) < version.parse("2"):
     logger.warning(
@@ -267,6 +268,10 @@ def is_gptqmodel_available():
             )
 
 
+def is_onnxslim_available():
+    return _onnxslim_available
+
+
 @contextmanager
 def check_if_pytorch_greater(target_version: str, message: str):
     r"""
diff --git a/setup.py b/setup.py
@@ -38,6 +38,7 @@
     "sentencepiece",
     "rjieba",
     "hf_xet",
+    "onnxslim>=0.1.53",
 ]
 
 QUALITY_REQUIRE = ["black~=23.1", "ruff==0.1.5"]
diff --git a/tests/exporters/onnx/test_export_cli.py b/tests/exporters/onnx/test_export_cli.py
@@ -40,6 +40,7 @@
     NO_DYNAMIC_AXES_EXPORT_SHAPES_TRANSFORMERS,
     PYTORCH_DIFFUSION_MODEL,
     PYTORCH_EXPORT_MODELS_TINY,
+    PYTORCH_EXPORT_MODELS_TINY_SLIM,
     PYTORCH_SENTENCE_TRANSFORMERS_MODEL,
     PYTORCH_TIMM_MODEL,
     PYTORCH_TIMM_MODEL_NO_DYNAMIC_AXES,
@@ -181,6 +182,7 @@ def _onnx_export(
         variant: str = "default",
         no_dynamic_axes: bool = False,
         model_kwargs: Optional[Dict] = None,
+        slim: bool = False,
     ):
         # We need to set this to some value to be able to test the outputs values for batch size > 1.
         if task == "text-classification":
@@ -203,6 +205,7 @@ def _onnx_export(
                     no_dynamic_axes=no_dynamic_axes,
                     pad_token_id=pad_token_id,
                     model_kwargs=model_kwargs,
+                    slim=slim,
                 )
             except MinimumVersionError as e:
                 pytest.skip(f"Skipping due to minimum version requirements not met. Full error: {e}")
@@ -730,3 +733,24 @@ def test_complex_synonyms(self):
             model.save_pretrained(tmpdir_in)
 
             main_export(model_name_or_path=tmpdir_in, output=tmpdir_out, task="text-classification")
+
+    @parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_MODELS_TINY_SLIM, library_name="transformers"))
+    def test_exporters_cli_pytorch_with_slim(
+        self,
+        test_name: str,
+        model_type: str,
+        model_name: str,
+        task: str,
+        variant: str,
+        monolith: bool,
+        no_post_process: bool,
+    ):
+        self._onnx_export(
+            model_name,
+            task,
+            monolith,
+            no_post_process,
+            slim=True,
+            device="cpu",
+            variant=variant,
+        )
diff --git a/tests/exporters/utils.py b/tests/exporters/utils.py
@@ -365,3 +365,7 @@
         "timm/ese_vovnet19b_dw.ra_in1k": ["image-classification"],
     }
 }
+
+PYTORCH_EXPORT_MODELS_TINY_SLIM = {
+    k: v for k, v in PYTORCH_EXPORT_MODELS_TINY.items() if k in ["modernbert", "llama", "t5", "whisper"]
+}

Original file line number	Diff line number	Diff line change
`@@ -38,6 +38,7 @@`
`38`	`38`	`"sentencepiece",`
`39`	`39`	`"rjieba",`
`40`	`40`	`"hf_xet",`
	`41`	`+ "onnxslim>=0.1.53",`
`41`	`42`	`]`
`42`	`43`
`43`	`44`	`QUALITY_REQUIRE = ["black~=23.1", "ruff==0.1.5"]`
Original file line number	Diff line number	Diff line change
`@@ -365,3 +365,7 @@`
`365`	`365`	`"timm/ese_vovnet19b_dw.ra_in1k": ["image-classification"],`
`366`	`366`	`}`
`367`	`367`	`}`
	`368`	`+`
	`369`	`+PYTORCH_EXPORT_MODELS_TINY_SLIM = {`
	`370`	`+ k: v for k, v in PYTORCH_EXPORT_MODELS_TINY.items() if k in ["modernbert", "llama", "t5", "whisper"]`
	`371`	`+}`