Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions optimum/commands/export/onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@ def parse_args_onnx(parser):
action="store_true",
help="PyTorch-only argument. Disables PyTorch ONNX export constant folding.",
)
optional_group.add_argument(
"--slim",
action="store_true",
help="Enables onnxslim optimization.",
)

input_group = parser.add_argument_group(
"Input shapes (if necessary, this allows to override the shapes of the input given to the ONNX exporter, that requires an example input)."
Expand Down Expand Up @@ -286,5 +291,6 @@ def run(self):
no_dynamic_axes=self.args.no_dynamic_axes,
model_kwargs=self.args.model_kwargs,
do_constant_folding=not self.args.no_constant_folding,
slim=self.args.slim,
**input_shapes,
)
4 changes: 4 additions & 0 deletions optimum/exporters/onnx/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def main_export(
legacy: bool = False,
no_dynamic_axes: bool = False,
do_constant_folding: bool = True,
slim: bool = False,
**kwargs_shapes,
):
"""
Expand Down Expand Up @@ -166,6 +167,8 @@ def main_export(
If True, disables the use of dynamic axes during ONNX export.
do_constant_folding (bool, defaults to `True`):
PyTorch-specific argument. If `True`, the PyTorch ONNX export will fold constants into adjacent nodes, if possible.
slim (bool, defaults to `False`):
PyTorch-specific argument. If `True`, use onnxslim to optimize the ONNX model.
**kwargs_shapes (`Dict`):
Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export.

Expand Down Expand Up @@ -391,6 +394,7 @@ def main_export(
task=task,
use_subprocess=use_subprocess,
do_constant_folding=do_constant_folding,
slim=slim,
**kwargs_shapes,
)

Expand Down
19 changes: 19 additions & 0 deletions optimum/exporters/onnx/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@
from transformers.modeling_utils import get_parameter_dtype
from transformers.utils import is_tf_available, is_torch_available

from ...onnx.graph_transformations import check_and_save_model
from ...onnx.utils import _get_onnx_external_constants, _get_onnx_external_data_tensors, check_model_uses_external_data
from ...utils import (
DEFAULT_DUMMY_SHAPES,
ONNX_WEIGHTS_NAME,
TORCH_MINIMUM_VERSION,
is_diffusers_available,
is_onnxslim_available,
is_torch_onnx_support_available,
is_transformers_version,
logging,
Expand Down Expand Up @@ -917,6 +919,7 @@ def onnx_export_from_model(
task: Optional[str] = None,
use_subprocess: bool = False,
do_constant_folding: bool = True,
slim: bool = False,
**kwargs_shapes,
):
"""
Expand Down Expand Up @@ -972,6 +975,8 @@ def onnx_export_from_model(
If True, disables the use of dynamic axes during ONNX export.
do_constant_folding (bool, defaults to `True`):
PyTorch-specific argument. If `True`, the PyTorch ONNX export will fold constants into adjacent nodes, if possible.
slim (bool, defaults to `False`):
Use onnxslim to optimize the ONNX model.
**kwargs_shapes (`Dict`):
Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export.

Expand Down Expand Up @@ -1196,6 +1201,20 @@ def onnx_export_from_model(
optimization_config.disable_shape_inference = True
optimizer.optimize(save_dir=output, optimization_config=optimization_config, file_suffix="")

if slim:
if not is_onnxslim_available():
raise ImportError("The pip package `onnxslim` is required to optimize onnx models.")

import glob

from onnxslim import slim

onnx_models = glob.glob(os.path.join(output, "**", "*.onnx"), recursive=True)

for model in onnx_models:
slimmed_model = slim(model)
check_and_save_model(slimmed_model, model)
Comment thread
IlyasMoutawwakil marked this conversation as resolved.
Outdated

# Optionally post process the obtained ONNX file(s), for example to merge the decoder / decoder with past if any
# TODO: treating diffusion separately is quite ugly
if not no_post_process and library_name != "diffusers":
Expand Down
1 change: 1 addition & 0 deletions optimum/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
is_gptqmodel_available,
is_onnx_available,
is_onnxruntime_available,
is_onnxslim_available,
is_pydantic_available,
is_sentence_transformers_available,
is_tf_available,
Expand Down
5 changes: 5 additions & 0 deletions optimum/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def _is_package_available(
"intel-tensorflow-avx512",
],
)
_onnxslim_available = _is_package_available("onnxslim")

if _tf_available and version.parse(_tf_version) < version.parse("2"):
logger.warning(
Expand Down Expand Up @@ -267,6 +268,10 @@ def is_gptqmodel_available():
)


def is_onnxslim_available():
return _onnxslim_available


@contextmanager
def check_if_pytorch_greater(target_version: str, message: str):
r"""
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"sentencepiece",
"rjieba",
"hf_xet",
"onnxslim>=0.1.53",
]

QUALITY_REQUIRE = ["black~=23.1", "ruff==0.1.5"]
Expand Down
35 changes: 35 additions & 0 deletions tests/exporters/onnx/test_export_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
NO_DYNAMIC_AXES_EXPORT_SHAPES_TRANSFORMERS,
PYTORCH_DIFFUSION_MODEL,
PYTORCH_EXPORT_MODELS_TINY,
PYTORCH_EXPORT_MODELS_TINY_SLIM,
PYTORCH_SENTENCE_TRANSFORMERS_MODEL,
PYTORCH_TIMM_MODEL,
PYTORCH_TIMM_MODEL_NO_DYNAMIC_AXES,
Expand Down Expand Up @@ -730,3 +731,37 @@ def test_complex_synonyms(self):
model.save_pretrained(tmpdir_in)

main_export(model_name_or_path=tmpdir_in, output=tmpdir_out, task="text-classification")

@parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_MODELS_TINY_SLIM, library_name="transformers"))
def test_exporters_cli_pytorch_with_slim(
self,
test_name: str,
model_type: str,
model_name: str,
task: str,
variant: str,
monolith: bool,
no_post_process: bool,
):
model_kwargs = None
if model_type == "speecht5":
model_kwargs = {"vocoder": "fxmarty/speecht5-hifigan-tiny"}

try:
self._onnx_export(
model_name,
task,
monolith,
no_post_process,
slim=True,
device="cpu",
variant=variant,
model_kwargs=model_kwargs,
)
except NotImplementedError as e:
if "Tried to use onnxslim for the model type" in str(e) or "doesn't support the graph optimization" in str(
e
):
self.skipTest(f"unsupported model type in onnxslim: {model_type}")
else:
raise e
Comment thread
IlyasMoutawwakil marked this conversation as resolved.
Outdated
4 changes: 4 additions & 0 deletions tests/exporters/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,3 +365,7 @@
"timm/ese_vovnet19b_dw.ra_in1k": ["image-classification"],
}
}

PYTORCH_EXPORT_MODELS_TINY_SLIM = {
k: v for k, v in PYTORCH_EXPORT_MODELS_TINY.items() if k in ["modernbert", "llama", "t5", "whisper"]
}