huggingface · IlyasMoutawwakil · Nov 25, 2024 · Oct 1, 2024 · Oct 11, 2024 · Oct 11, 2024
diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx
@@ -36,6 +36,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra
 - Data2VecVision
 - Deberta
 - Deberta-v2
+- Decision Transformer
 - Deit
 - Detr
 - DistilBert

diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py
@@ -173,6 +173,14 @@ class OnnxConfig(ExportConfig, ABC):
                 "end_logits": {0: "batch_size", 1: "sequence_length"},
             }
         ),
+        "reinforcement-learning": OrderedDict(
+            {
+                "return_preds": {0: "batch_size", 1: "sequence_length"},
+                "action_preds": {0: "batch_size", 1: "sequence_length", 2: "act_dim"},
+                "state_preds": {0: "batch_size", 1: "sequence_length", 2: "state_dim"},
+                "last_hidden_state": {0: "batch_size", 1: "sequence_length", 2: "last_hidden_state"},
+            }
+        ),
         "semantic-segmentation": OrderedDict({"logits": {0: "batch_size", 1: "num_labels", 2: "height", 3: "width"}}),
         "text2text-generation": OrderedDict({"logits": {0: "batch_size", 1: "decoder_sequence_length"}}),
         "text-classification": OrderedDict({"logits": {0: "batch_size"}}),

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
@@ -27,6 +27,7 @@
     DummyAudioInputGenerator,
     DummyCodegenDecoderTextInputGenerator,
     DummyDecoderTextInputGenerator,
+    DummyDecisionTransformerInputGenerator,
     DummyEncodecInputGenerator,
     DummyInputGenerator,
     DummyIntGenerator,
@@ -256,6 +257,23 @@ class ImageGPTOnnxConfig(GPT2OnnxConfig):
     pass
 
 
+class DecisionTransformerOnnxConfig(GPT2OnnxConfig):
+    DUMMY_INPUT_GENERATOR_CLASSES = (
+        DummyDecisionTransformerInputGenerator,
+    )
+
+    @property
+    def inputs(self) -> Dict[str, Dict[int, str]]:
+
+        return {
+            'timesteps': {0: 'batch_size', 1: 'sequence_length'},
+            'returns_to_go': {0: 'batch_size', 1: 'sequence_length'},
+            'attention_mask': {0: 'batch_size', 1: 'sequence_length'},
+            'actions': {0: 'batch_size', 1: 'sequence_length', 2: 'act_dim'},
+            'states': {0: 'batch_size', 1: 'sequence_length', 2: 'state_dim'},
+        }
+
+
 class GPTNeoOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
     DEFAULT_ONNX_OPSET = 14
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(num_attention_heads="num_heads")

diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
@@ -217,6 +217,7 @@ class TasksManager:
             "multiple-choice": "AutoModelForMultipleChoice",
             "object-detection": "AutoModelForObjectDetection",
             "question-answering": "AutoModelForQuestionAnswering",
+            "reinforcement-learning": "AutoModel",
             "semantic-segmentation": "AutoModelForSemanticSegmentation",
             "text-to-audio": ("AutoModelForTextToSpectrogram", "AutoModelForTextToWaveform"),
             "text-generation": "AutoModelForCausalLM",
@@ -562,6 +563,12 @@ class TasksManager:
             onnx="DebertaV2OnnxConfig",
             tflite="DebertaV2TFLiteConfig",
         ),
+        "decision-transformer": supported_tasks_mapping(
+            "feature-extraction",
+            "feature-extraction-with-past",
+            "reinforcement-learning",
+            onnx="DecisionTransformerOnnxConfig",
+        ),
         "deit": supported_tasks_mapping(
             "feature-extraction",
             "image-classification",
@@ -2061,6 +2068,9 @@ def get_model_from_task(
             if original_task == "automatic-speech-recognition" or task == "automatic-speech-recognition":
                 if original_task == "auto" and config.architectures is not None:
                     model_class_name = config.architectures[0]
+            elif original_task == "reinforcement-learning" or task == "reinforcement-learning":
+                if config.architectures is not None:
+                    model_class_name = config.architectures[0]
 
         if library_name == "diffusers":
             config = DiffusionPipeline.load_config(model_name_or_path, **kwargs)

diff --git a/optimum/utils/__init__.py b/optimum/utils/__init__.py
@@ -49,6 +49,7 @@
     DummyAudioInputGenerator,
     DummyBboxInputGenerator,
     DummyCodegenDecoderTextInputGenerator,
+    DummyDecisionTransformerInputGenerator,
     DummyDecoderTextInputGenerator,
     DummyEncodecInputGenerator,
     DummyInputGenerator,

diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py
@@ -507,6 +507,41 @@ class DummyDecoderTextInputGenerator(DummyTextInputGenerator):
     )
 
 
+class DummyDecisionTransformerInputGenerator(DummyTextInputGenerator):
+    """
+    Generates dummy decision transformer inputs.
+    """
+
+    SUPPORTED_INPUT_NAMES = (
+        'actions',
+        'timesteps',
+        'attention_mask',
+        'returns_to_go',
+        'states',
+    )
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.act_dim = self.normalized_config.config.act_dim
+        self.state_dim = self.normalized_config.config.state_dim
+        self.max_ep_len = self.normalized_config.config.max_ep_len
+
+    def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
+        if input_name == "states":
+            shape = [self.batch_size, self.sequence_length, self.state_dim]
+        elif input_name == "actions":
+            shape = [self.batch_size, self.sequence_length, self.act_dim]
+        elif input_name == 'returns_to_go':
+            shape = [self.batch_size, self.sequence_length, 1]
+        elif input_name == "attention_mask":
+            shape = [self.batch_size, self.sequence_length]
+        elif input_name == 'timesteps':
+            shape = [self.batch_size, self.sequence_length]
+            return self.random_int_tensor(shape=shape, max_value=self.max_ep_len, framework=framework, dtype=int_dtype)
+
+        return self.random_float_tensor(shape, min_value=-2., max_value=2., framework=framework, dtype=float_dtype)
+
+
 class DummySeq2SeqDecoderTextInputGenerator(DummyDecoderTextInputGenerator):
     SUPPORTED_INPUT_NAMES = (
         "decoder_input_ids",

diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py
@@ -85,6 +85,15 @@ class NormalizedTextConfig(NormalizedConfig):
     EOS_TOKEN_ID = "eos_token_id"
 
 
+class NormalizedDecisionTransformerConfig(NormalizedConfig):
+    # REFERENCE: https://huggingface.co/docs/transformers/model_doc/decision_transformer
+    ACT_DIM = "act_dim"
+    STATE_DIM = "state_dim"
+
+    MAX_EP_LEN = "max_ep_len"
+    HIDDEN_SIZE = "hidden_size"
+
+
 class NormalizedTextConfigWithGQA(NormalizedTextConfig):
     NUM_KEY_VALUE_HEADS = "num_key_value_heads"
 
@@ -236,6 +245,7 @@ class NormalizedConfigManager:
         "cvt": NormalizedVisionConfig,
         "deberta": NormalizedTextConfig,
         "deberta-v2": NormalizedTextConfig,
+        "decision-transformer": NormalizedDecisionTransformerConfig,
         "deit": NormalizedVisionConfig,
         "distilbert": NormalizedTextConfig.with_args(num_attention_heads="n_heads", hidden_size="dim"),
         "donut-swin": NormalizedVisionConfig,

diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
@@ -67,6 +67,7 @@
     "data2vec-audio": "hf-internal-testing/tiny-random-Data2VecAudioModel",
     "deberta": "hf-internal-testing/tiny-random-DebertaModel",
     "deberta-v2": "hf-internal-testing/tiny-random-DebertaV2Model",
+    "decision-transformer": "edbeeching/decision-transformer-gym-hopper-medium",
     "deit": "hf-internal-testing/tiny-random-DeiTModel",
     "donut": "fxmarty/tiny-doc-qa-vision-encoder-decoder",
     "donut-swin": "hf-internal-testing/tiny-random-DonutSwinModel",