huggingface · IlyasMoutawwakil · Jul 23, 2025 · Jul 10, 2025 · Jul 10, 2025 · Jul 10, 2025
diff --git a/.github/workflows/test_exporters_onnx.yml b/.github/workflows/test_exporters_onnx.yml
@@ -40,4 +40,4 @@ jobs:
 
       - name: Test with pytest
         run: |
-          pytest tests/exporters/onnx/test_export.py -vvvv --durations=0 -n auto
+          pytest tests/exporters/onnx/test_export.py -vvvv --durations=0
diff --git a/.github/workflows/test_exporters_tflite.yml b/.github/workflows/test_exporters_tflite.yml
@@ -6,7 +6,6 @@ on:
     branches: [main]
   pull_request:
     branches: [main]
-    types: [opened, synchronize, reopened, labeled, unlabeled]
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/test_exporters_tflite_cli.yml b/.github/workflows/test_exporters_tflite_cli.yml
@@ -6,7 +6,6 @@ on:
     branches: [main]
   pull_request:
     branches: [main]
-    types: [opened, synchronize, reopened, labeled, unlabeled]
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/test_onnxruntime_slow.yml b/.github/workflows/test_onnxruntime_slow.yml
@@ -87,7 +87,7 @@ jobs:
 
       - name: Test with pytest (in parallel)
         run: |
-          pytest tests/onnxruntime -m "not run_in_series" --durations=0 -vvvv -n auto
+          pytest tests/onnxruntime -m "not run_in_series" --durations=0 -vvvv
         env:
           HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
           RUN_SLOW: 1
diff --git a/optimum/commands/__init__.py b/optimum/commands/__init__.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)
 
 from .base import BaseOptimumCLICommand, CommandInfo, RootOptimumCLICommand
 from .env import EnvironmentCommand

diff --git a/optimum/commands/export/__init__.py b/optimum/commands/export/__init__.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)
 
 from .base import ExportCommand
 from .onnx import ONNXExportCommand

diff --git a/optimum/exporters/__init__.py b/optimum/exporters/__init__.py
@@ -12,5 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from . import onnx  # noqa
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)
+
 from .tasks import TasksManager  # noqa
+from .base import ExporterConfig  # noqa
diff --git a/optimum/exporters/base.py b/optimum/exporters/base.py
@@ -69,7 +69,12 @@
 
 
 class ExportConfig(ABC):
-    pass
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        logger.warning(
+            "The `ExportConfig` class is deprecated and will be removed in a future version. "
+            "Please use `ExporterConfig` instead."
+        )
 
 
 class ExporterConfig(ABC):

diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py
@@ -340,6 +340,10 @@ def main_export(
         if model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED and is_transformers_version("<", "4.42"):
             loading_kwargs["attn_implementation"] = "eager"
 
+        # Only eager attention implementation returns attentions
+        if model_kwargs is not None and model_kwargs.get("output_attentions", False):
+            loading_kwargs["attn_implementation"] = "eager"
+
     with DisableCompileContextManager():
         model = TasksManager.get_model_from_task(
             task,

diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py
@@ -642,6 +642,11 @@ def export_tensorflow(
         `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named outputs from
         the ONNX configuration.
     """
+
+    logger.warning(
+        "The TensorFlow ONNX export is deprecated and will be removed in the next major release of Optimum."
+    )
+
     # This is needed to import onnx and tf2onnx because onnx is also the name of the current directory.
     import sys
 

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
@@ -96,13 +96,13 @@
     MgpstrModelPatcher,
     MistralModelPatcher,
     MusicgenModelPatcher,
+    Qwen3MoeModelPatcher,
     SAMModelPatcher,
     SentenceTransformersCLIPPatcher,
     SentenceTransformersTransformerPatcher,
     SpeechT5ModelPatcher,
     VisionEncoderDecoderPatcher,
     VitPoseModelPatcher,
-    WavLMModelPatcher,
 )
 
 
@@ -433,6 +433,11 @@ class LlamaOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
 
 
+@register_tasks_manager_onnx("smollm3", *COMMON_TEXT_GENERATION_TASKS + ["text-classification"])
+class SmolLM3OnnxConfig(LlamaOnnxConfig):
+    MIN_TRANSFORMERS_VERSION = version.parse("4.53.0")
+
+
 @register_tasks_manager_onnx("olmo", *COMMON_TEXT_GENERATION_TASKS)
 class OlmoOnnxConfig(LlamaOnnxConfig):
     ATOL_FOR_VALIDATION = 1e-4
@@ -459,6 +464,7 @@ class Qwen3OnnxConfig(LlamaOnnxConfig):
 )
 class Qwen3MoeOnnxConfig(LlamaOnnxConfig):
     MIN_TRANSFORMERS_VERSION = version.parse("4.51.0")
+    _MODEL_PATCHER = Qwen3MoeModelPatcher
 
 
 @register_tasks_manager_onnx("gemma", *COMMON_TEXT_GENERATION_TASKS + ["text-classification"])
@@ -476,19 +482,17 @@ class GraniteOnnxConfig(LlamaOnnxConfig):
 
 @register_tasks_manager_onnx("phi", *COMMON_TEXT_GENERATION_TASKS + ["text-classification"])
 class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
-    DEFAULT_ONNX_OPSET = 14  # Phi now uses F.scaled_dot_product_attention by default for torch>=2.1.1.
+    DEFAULT_ONNX_OPSET = 14  # Phi now uses F.scaled_dot_product_attention
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
-    MIN_TRANSFORMERS_VERSION = version.parse("4.42.0")
+    MIN_TRANSFORMERS_VERSION = version.parse("4.36.0")
 
 
 @register_tasks_manager_onnx("phi3", *COMMON_TEXT_GENERATION_TASKS + ["text-classification"])
 class Phi3OnnxConfig(PhiOnnxConfig):
-    DUMMY_INPUT_GENERATOR_CLASSES = (
-        MistralDummyPastKeyValuesGenerator,
-    ) + TextDecoderOnnxConfig.DUMMY_INPUT_GENERATOR_CLASSES
+    DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, MistralDummyPastKeyValuesGenerator)
     DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfigWithGQA
-    MIN_TRANSFORMERS_VERSION = version.parse("4.50.0")
+    MIN_TRANSFORMERS_VERSION = version.parse("4.41.0")
 
 
 @register_tasks_manager_onnx("internlm2", *["text-generation", "text-generation-with-past"])
@@ -499,7 +503,7 @@ class InternLM2OnnxConfig(LlamaOnnxConfig):
 @register_tasks_manager_onnx("mistral", *COMMON_TEXT_GENERATION_TASKS + ["text-classification"])
 class MistralOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
     # This is because of the patching of torch.triu in AttentionMaskConverter, that exists from transformers>=4.35
-    MIN_TRANSFORMERS_VERSION = version.parse("4.34.99")
+    MIN_TRANSFORMERS_VERSION = version.parse("4.35.0")
 
     # The ONNX export of this architecture needs the Trilu operator support, available since opset 14
     DEFAULT_ONNX_OPSET = 14
@@ -511,12 +515,10 @@ class MistralOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
     _MODEL_PATCHER = MistralModelPatcher
 
 
-@register_tasks_manager_onnx("mpt", *["text-generation", "text-generation-with-past", "text-classification"])
+@register_tasks_manager_onnx("mpt", *COMMON_TEXT_GENERATION_TASKS + ["text-classification", "token-classification"])
 class MPTOnnxConfig(TextDecoderOnnxConfig):
     # MPT does not require position_ids input.
-    DEFAULT_ONNX_OPSET = 13
-    # TODO: fix inference for transformers < v4.41 for beam_search > 1
-    MIN_TRANSFORMERS_VERSION = version.parse("4.41.0")
+    MIN_TRANSFORMERS_VERSION = version.parse("4.36.0")
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(
         num_attention_heads="n_heads", hidden_size="d_model", num_layers="n_layers"
     )
@@ -525,15 +527,30 @@ class MPTOnnxConfig(TextDecoderOnnxConfig):
 @register_tasks_manager_onnx("bloom", *COMMON_TEXT_GENERATION_TASKS + ["text-classification", "token-classification"])
 class BloomOnnxConfig(TextDecoderOnnxConfig):
     # Bloom does not require position_ids input.
-    DUMMY_INPUT_GENERATOR_CLASSES = (
-        BloomDummyPastKeyValuesGenerator,
-    ) + TextDecoderOnnxConfig.DUMMY_INPUT_GENERATOR_CLASSES
-
     DEFAULT_ONNX_OPSET = 14  # Bloom uses F.scaled_dot_product_attention
-    MIN_TRANSFORMERS_VERSION = version.parse("4.44.0")
+    MIN_TRANSFORMERS_VERSION = version.parse("4.36.0")
     DUMMY_PKV_GENERATOR_CLASS = BloomDummyPastKeyValuesGenerator
+    DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, BloomDummyPastKeyValuesGenerator)
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(num_layers="n_layer", num_attention_heads="n_head")
 
+    def add_past_key_values(self, inputs_or_outputs: Dict[str, Dict[int, str]], direction: str):
+        if is_transformers_version(">=", "4.44"):
+            super().add_past_key_values(inputs_or_outputs, direction)
+        else:
+            if direction not in ["inputs", "outputs"]:
+                raise ValueError(f'direction must either be "inputs" or "outputs", but {direction} was given')
+
+            if direction == "inputs":
+                decoder_sequence_name = "past_sequence_length"
+                name = "past_key_values"
+            else:
+                decoder_sequence_name = "past_sequence_length + 1"
+                name = "present"
+
+            for i in range(self._normalized_config.num_layers):
+                inputs_or_outputs[f"{name}.{i}.key"] = {0: "batch_size * num_heads", 2: decoder_sequence_name}
+                inputs_or_outputs[f"{name}.{i}.value"] = {0: "batch_size * num_heads", 1: decoder_sequence_name}
+
 
 @register_tasks_manager_onnx(
     "gpt_bigcode", *COMMON_TEXT_GENERATION_TASKS + ["text-classification", "token-classification"]
@@ -1838,11 +1855,7 @@ class UniSpeechSATOnnxConfig(HubertOnnxConfig):
     ],
 )
 class WavLMOnnxConfig(HubertOnnxConfig):
-    DEFAULT_ONNX_OPSET = 12
-    # we need to set output_attentions=True in the model input to avoid calling
-    # torch.nn.functional.scaled_dot_product_attention that is not supported by the ONNX export
-    # due to the op torch.nn.functional.multi_head_attention_forward used for WavLM
-    _MODEL_PATCHER = WavLMModelPatcher
+    DEFAULT_ONNX_OPSET = 14  # now uses F.scaled_dot_product_attention by default for torch>=2.1.1.
 
 
 @register_tasks_manager_onnx("audio-spectrogram-transformer", *["feature-extraction", "audio-classification"])