huggingface · IlyasMoutawwakil · May 16, 2025 · May 12, 2025 · May 12, 2025 · May 12, 2025
diff --git a/optimum/commands/export/onnx.py b/optimum/commands/export/onnx.py
@@ -169,6 +169,11 @@ def parse_args_onnx(parser):
         action="store_true",
         help="PyTorch-only argument. Disables PyTorch ONNX export constant folding.",
     )
+    optional_group.add_argument(
+        "--slim",
+        action="store_true",
+        help="Enables onnxslim optimization.",
+    )
 
     input_group = parser.add_argument_group(
         "Input shapes (if necessary, this allows to override the shapes of the input given to the ONNX exporter, that requires an example input)."
@@ -286,5 +291,6 @@ def run(self):
             no_dynamic_axes=self.args.no_dynamic_axes,
             model_kwargs=self.args.model_kwargs,
             do_constant_folding=not self.args.no_constant_folding,
+            slim=self.args.slim,
             **input_shapes,
         )
diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py
@@ -78,6 +78,7 @@ def main_export(
     legacy: bool = False,
     no_dynamic_axes: bool = False,
     do_constant_folding: bool = True,
+    slim: bool = False,
     **kwargs_shapes,
 ):
     """
@@ -166,6 +167,8 @@ def main_export(
             If True, disables the use of dynamic axes during ONNX export.
         do_constant_folding (bool, defaults to `True`):
             PyTorch-specific argument. If `True`, the PyTorch ONNX export will fold constants into adjacent nodes, if possible.
+        slim (bool, defaults to `False`):
+            PyTorch-specific argument. If `True`, use onnxslim to optimize the ONNX model.
         **kwargs_shapes (`Dict`):
             Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export.
 
@@ -391,6 +394,7 @@ def main_export(
         task=task,
         use_subprocess=use_subprocess,
         do_constant_folding=do_constant_folding,
+        slim=slim,
         **kwargs_shapes,
     )
 

diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py
@@ -30,12 +30,14 @@
 from transformers.modeling_utils import get_parameter_dtype
 from transformers.utils import is_tf_available, is_torch_available
 
+from ...onnx.graph_transformations import check_and_save_model
 from ...onnx.utils import _get_onnx_external_constants, _get_onnx_external_data_tensors, check_model_uses_external_data
 from ...utils import (
     DEFAULT_DUMMY_SHAPES,
     ONNX_WEIGHTS_NAME,
     TORCH_MINIMUM_VERSION,
     is_diffusers_available,
+    is_onnxslim_available,
     is_torch_onnx_support_available,
     is_transformers_version,
     logging,
@@ -917,6 +919,7 @@ def onnx_export_from_model(
     task: Optional[str] = None,
     use_subprocess: bool = False,
     do_constant_folding: bool = True,
+    slim: bool = False,
     **kwargs_shapes,
 ):
     """
@@ -972,6 +975,8 @@ def onnx_export_from_model(
             If True, disables the use of dynamic axes during ONNX export.
         do_constant_folding (bool, defaults to `True`):
             PyTorch-specific argument. If `True`, the PyTorch ONNX export will fold constants into adjacent nodes, if possible.
+        slim (bool, defaults to `False`):
+            Use onnxslim to optimize the ONNX model.
         **kwargs_shapes (`Dict`):
             Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export.
 
@@ -1196,6 +1201,20 @@ def onnx_export_from_model(
         optimization_config.disable_shape_inference = True
         optimizer.optimize(save_dir=output, optimization_config=optimization_config, file_suffix="")
 
+    if slim:
+        if not is_onnxslim_available():
+            raise ImportError("The pip package `onnxslim` is required to optimize onnx models.")
+
+        import glob
+
+        from onnxslim import slim
+
+        onnx_models = glob.glob(os.path.join(output, "**", "*.onnx"), recursive=True)
+
+        for model in onnx_models:
+            slimmed_model = slim(model)
+            check_and_save_model(slimmed_model, model)
+
     # Optionally post process the obtained ONNX file(s), for example to merge the decoder / decoder with past if any
     # TODO: treating diffusion separately is quite ugly
     if not no_post_process and library_name != "diffusers":

diff --git a/optimum/utils/__init__.py b/optimum/utils/__init__.py
@@ -41,6 +41,7 @@
     is_gptqmodel_available,
     is_onnx_available,
     is_onnxruntime_available,
+    is_onnxslim_available,
     is_pydantic_available,
     is_sentence_transformers_available,
     is_tf_available,

diff --git a/optimum/utils/import_utils.py b/optimum/utils/import_utils.py
@@ -134,6 +134,7 @@ def _is_package_available(
         "intel-tensorflow-avx512",
     ],
 )
+_onnxslim_available = _is_package_available("onnxslim")
 
 if _tf_available and version.parse(_tf_version) < version.parse("2"):
     logger.warning(
@@ -267,6 +268,10 @@ def is_gptqmodel_available():
             )
 
 
+def is_onnxslim_available():
+    return _onnxslim_available
+
+
 @contextmanager
 def check_if_pytorch_greater(target_version: str, message: str):
     r"""

diff --git a/setup.py b/setup.py
@@ -38,6 +38,7 @@
     "sentencepiece",
     "rjieba",
     "hf_xet",
+    "onnxslim>=0.1.53",
 ]
 
 QUALITY_REQUIRE = ["black~=23.1", "ruff==0.1.5"]

diff --git a/tests/exporters/onnx/test_export_cli.py b/tests/exporters/onnx/test_export_cli.py
@@ -40,6 +40,7 @@
     NO_DYNAMIC_AXES_EXPORT_SHAPES_TRANSFORMERS,
     PYTORCH_DIFFUSION_MODEL,
     PYTORCH_EXPORT_MODELS_TINY,
+    PYTORCH_EXPORT_MODELS_TINY_SLIM,
     PYTORCH_SENTENCE_TRANSFORMERS_MODEL,
     PYTORCH_TIMM_MODEL,
     PYTORCH_TIMM_MODEL_NO_DYNAMIC_AXES,
@@ -730,3 +731,37 @@ def test_complex_synonyms(self):
             model.save_pretrained(tmpdir_in)
 
             main_export(model_name_or_path=tmpdir_in, output=tmpdir_out, task="text-classification")
+
+    @parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_MODELS_TINY_SLIM, library_name="transformers"))
+    def test_exporters_cli_pytorch_with_slim(
+        self,
+        test_name: str,
+        model_type: str,
+        model_name: str,
+        task: str,
+        variant: str,
+        monolith: bool,
+        no_post_process: bool,
+    ):
+        model_kwargs = None
+        if model_type == "speecht5":
+            model_kwargs = {"vocoder": "fxmarty/speecht5-hifigan-tiny"}
+
+        try:
+            self._onnx_export(
+                model_name,
+                task,
+                monolith,
+                no_post_process,
+                slim=True,
+                device="cpu",
+                variant=variant,
+                model_kwargs=model_kwargs,
+            )
+        except NotImplementedError as e:
+            if "Tried to use onnxslim for the model type" in str(e) or "doesn't support the graph optimization" in str(
+                e
+            ):
+                self.skipTest(f"unsupported model type in onnxslim: {model_type}")
+            else:
+                raise e
diff --git a/tests/exporters/utils.py b/tests/exporters/utils.py
@@ -365,3 +365,7 @@
         "timm/ese_vovnet19b_dw.ra_in1k": ["image-classification"],
     }
 }
+
+PYTORCH_EXPORT_MODELS_TINY_SLIM = {
+    k: v for k, v in PYTORCH_EXPORT_MODELS_TINY.items() if k in ["modernbert", "llama", "t5", "whisper"]
+}