huggingface · echarlaix · May 28, 2025 · May 23, 2025 · May 23, 2025 · May 23, 2025
diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml
@@ -27,9 +27,11 @@ jobs:
       matrix:
         python-version: [3.9]
         runs-on: [ubuntu-22.04]
-        test_file: [
+        test_file:
+          [
             test_timm.py,
-            test_modeling.py, # todo: split into test_encoder, test_decoder and test_encoder_decoder
+            test_decoder.py,
+            test_modeling.py,
             test_diffusion.py,
             test_optimization.py,
             test_quantization.py,

diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py
@@ -938,19 +938,19 @@ def post_process_exported_models(
             path, models_and_onnx_configs, onnx_files_subpaths
         )
 
-        # Attempt to merge only if the decoder was exported without/with past, and ignore seq2seq models exported with text-generation task
-        if len(onnx_files_subpaths) >= 3 and self.use_past is True:
-            decoder_path = Path(path, onnx_files_subpaths[1])
-            decoder_with_past_path = Path(path, onnx_files_subpaths[2])
-            decoder_merged_path = Path(path, ONNX_DECODER_MERGED_NAME + ".onnx")
+        # Attempt to merge only if the decoder was exported without/with past
+        onnx_decoder_path = Path(path, ONNX_DECODER_NAME + ".onnx")
+        onnx_decoder_with_past_path = Path(path, ONNX_DECODER_WITH_PAST_NAME + ".onnx")
+        decoder_merged_path = Path(path, ONNX_DECODER_MERGED_NAME + ".onnx")
+        if onnx_decoder_path.is_file() and onnx_decoder_with_past_path.is_file() and self.use_past is True:
             try:
-                from ...onnx import merge_decoders
-
                 # The decoder with past does not output the cross attention past key values as they are constant,
                 # hence the need for strict=False
+                from ...onnx import merge_decoders
+
                 merge_decoders(
-                    decoder=decoder_path,
-                    decoder_with_past=decoder_with_past_path,
+                    decoder=onnx_decoder_path,
+                    decoder_with_past=onnx_decoder_with_past_path,
                     save_path=decoder_merged_path,
                     strict=False,
                 )

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
@@ -337,7 +337,7 @@ class GPTNeoXOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
 
 
 # OPT does not take position_ids as input for transfomers < v4.46, needs it for transformers >= v4.46
-if is_transformers_version(">=", "4.45.99"):
+if is_transformers_version(">=", "4.46.0"):
 
     class OPTOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
         DEFAULT_ONNX_OPSET = 14  # uses SDPA in Transformers, hence opset>=14.
@@ -352,7 +352,6 @@ class OPTOnnxConfig(TextDecoderOnnxConfig):
 
 class LlamaOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
     DEFAULT_ONNX_OPSET = 14  # Llama now uses F.scaled_dot_product_attention by default for torch>=2.1.1.
-
     DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, MistralDummyPastKeyValuesGenerator)
     DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
@@ -385,7 +384,7 @@ class GraniteOnnxConfig(LlamaOnnxConfig):
 class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
     DEFAULT_ONNX_OPSET = 14  # Phi now uses F.scaled_dot_product_attention by default for torch>=2.1.1.
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
-    MIN_TRANSFORMERS_VERSION = version.parse("4.36.0")
+    MIN_TRANSFORMERS_VERSION = version.parse("4.42.0")
 
 
 class Phi3OnnxConfig(PhiOnnxConfig):
@@ -430,33 +429,11 @@ class BloomOnnxConfig(TextDecoderOnnxConfig):
     DUMMY_INPUT_GENERATOR_CLASSES = (
         BloomDummyPastKeyValuesGenerator,
     ) + TextDecoderOnnxConfig.DUMMY_INPUT_GENERATOR_CLASSES
+
+    DEFAULT_ONNX_OPSET = 14  # Bloom uses F.scaled_dot_product_attention
+    MIN_TRANSFORMERS_VERSION = version.parse("4.44.0")
     DUMMY_PKV_GENERATOR_CLASS = BloomDummyPastKeyValuesGenerator
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(num_layers="n_layer", num_attention_heads="n_head")
-    DEFAULT_ONNX_OPSET = 14  # Bloom uses aten::triu that requires opset>=14, and F.scaled_dot_product_attention
-
-    def add_past_key_values(self, inputs_or_outputs: Dict[str, Dict[int, str]], direction: str):
-        if is_transformers_version(">=", "4.44"):
-            super().add_past_key_values(inputs_or_outputs, direction)
-        else:
-            if direction not in ["inputs", "outputs"]:
-                raise ValueError(f'direction must either be "inputs" or "outputs", but {direction} was given')
-
-            if direction == "inputs":
-                decoder_sequence_name = "past_sequence_length"
-                name = "past_key_values"
-            else:
-                decoder_sequence_name = "past_sequence_length + 1"
-                name = "present"
-
-            for i in range(self._normalized_config.num_layers):
-                inputs_or_outputs[f"{name}.{i}.key"] = {
-                    0: "batch_size x num_heads",
-                    2: decoder_sequence_name,
-                }
-                inputs_or_outputs[f"{name}.{i}.value"] = {
-                    0: "batch_size x num_heads",
-                    1: decoder_sequence_name,
-                }
 
 
 class GPTBigCodeOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):

diff --git a/optimum/exporters/onnx/utils.py b/optimum/exporters/onnx/utils.py
@@ -89,7 +89,7 @@
 }
 
 
-if is_transformers_version(">=", "4.45.99"):
+if is_transformers_version(">=", "4.46.0"):
     MODEL_TYPES_REQUIRING_POSITION_IDS.add("opt")
 
 

diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
@@ -29,7 +29,7 @@
 from transformers import AutoConfig, PretrainedConfig, is_tf_available, is_torch_available
 from transformers.utils import SAFE_WEIGHTS_NAME, TF2_WEIGHTS_NAME, WEIGHTS_NAME, http_user_agent, logging
 
-from ..utils.import_utils import is_diffusers_available, is_onnx_available
+from ..utils.import_utils import is_diffusers_available, is_onnx_available, is_transformers_version
 
 
 if TYPE_CHECKING:
@@ -1475,12 +1475,23 @@ def get_supported_model_type_for_task(task: str, exporter: str) -> List[str]:
         """
         Returns the list of supported architectures by the exporter for a given task. Transformers-specific.
         """
-        return [
+
+        supported_model_types = [
             model_type.replace("-", "_")
             for model_type in TasksManager._SUPPORTED_MODEL_TYPE
             if task in TasksManager._SUPPORTED_MODEL_TYPE[model_type][exporter]
+            and is_transformers_version(
+                ">=",
+                str(
+                    TasksManager.get_exporter_config_constructor(
+                        exporter, task=task, model_type=model_type
+                    ).func.MIN_TRANSFORMERS_VERSION
+                ),
+            )
 f"{config.MIN_TRANSFORMERS_VERSION}, got: {transformers.__version__}" 
 f"{config.MIN_TRANSFORMERS_VERSION}, got: {transformers.__version__}" 
         ]
 
+        return supported_model_types
+
     @staticmethod
     def synonyms_for_task(task: str) -> Set[str]:
         synonyms = [k for k, v in TasksManager._SYNONYM_TASK_MAP.items() if v == task]
-Original file line number
+Diff line change
@@ Expand Up / @@ -89,7 +89,7 @@ @@
     }
-    if is_transformers_version(">=", "4.45.99"):
+    if is_transformers_version(">=", "4.46.0"):
         MODEL_TYPES_REQUIRING_POSITION_IDS.add("opt")
@@ Expand Down @@