Add ONNX export optimization support for ModernBERT · Issue #2177 · huggingface/optimum

Feature request

Release v1.24.0 successfully supports exporting a ModernBERT model to ONNX; however, this support does not extend to enabling optimizations via the --optimize flag in optimum-cli.

I'm not sure how much needs to go into enabling this in a more formal capacity, but a very brief attempt by me locally at simply adding modernbert in:

optimum/optimum/onnxruntime/utils.py

Lines 101 to 149 in afff2fa

    
           class ORTConfigManager: 
        
               """ 
        
               A class that contains all the information needed by ONNX Runtime optimization for a given model type. 
        
               Attributes: 
        
                   _conf (`Dict[str]`): 
        
                       A dictionary mapping each supported model type to the corresponding ONNX Runtime model type. 
        
               """ 
        
               # Contribution note: Please add new models in alphabetical order 
        
               # TODO: for encoder-decoder models, validate if bert or gpt2 optimization is better 
        
               _conf = { 
        
                   "albert": "bert", 
        
                   "bart": "bart", 
        
                   "bert": "bert", 
        
                   "big-bird": "bert", 
        
                   "bigbird-pegasus": "bart", 
        
                   "blenderbot": "bert", 
        
                   "bloom": "gpt2", 
        
                   "camembert": "bert", 
        
                   "codegen": "gpt2", 
        
                   "deberta": "bert", 
        
                   "deberta-v2": "bert", 
        
                   "distilbert": "bert", 
        
                   "electra": "bert", 
        
                   "gpt2": "gpt2", 
        
                   "gpt-bigcode": "gpt2", 
        
                   "gpt-neo": "gpt2", 
        
                   "gpt-neox": "gpt2", 
        
                   "gptj": "gpt2", 
        
                   "granite": "gpt2", 
        
                   "longt5": "bert", 
        
                   "llama": "gpt2", 
        
                   "marian": "bart", 
        
                   "mbart": "bart", 
        
                   "mistral": "gpt2", 
        
                   "mpnet": "bert", 
        
                   "mt5": "bart", 
        
                   "m2m-100": "bart", 
        
                   "nystromformer": "bert", 
        
                   "pegasus": "bert", 
        
                   "roberta": "bert", 
        
                   "segformer": "vit", 
        
                   "t5": "bert", 
        
                   "vit": "vit", 
        
                   "whisper": "bart", 
        
                   "xlm-roberta": "bert", 
        
                   "pix2struct": "vit", 
        
               }

and

optimum/optimum/utils/normalized_config.py

Lines 233 to 294 in afff2fa

    
           _conf = { 
        
               "albert": NormalizedTextConfig, 
        
               "bart": BartLikeNormalizedTextConfig, 
        
               "bert": NormalizedTextConfig, 
        
               "big-bird": NormalizedTextConfig, 
        
               "bigbird-pegasus": BartLikeNormalizedTextConfig, 
        
               "blenderbot": BartLikeNormalizedTextConfig, 
        
               "blenderbot-small": BartLikeNormalizedTextConfig, 
        
               "bloom": NormalizedTextConfig.with_args(num_layers="n_layer"), 
        
               "falcon": NormalizedTextConfig, 
        
               "camembert": NormalizedTextConfig, 
        
               "codegen": GPT2LikeNormalizedTextConfig, 
        
               "cvt": NormalizedVisionConfig, 
        
               "deberta": NormalizedTextConfig, 
        
               "deberta-v2": NormalizedTextConfig, 
        
               "deit": NormalizedVisionConfig, 
        
               "distilbert": NormalizedTextConfig.with_args(num_attention_heads="n_heads", hidden_size="dim"), 
        
               "donut-swin": NormalizedVisionConfig, 
        
               "electra": NormalizedTextConfig, 
        
               "encoder-decoder": NormalizedEncoderDecoderConfig, 
        
               "gemma": NormalizedTextConfigWithGQA, 
        
               "gpt2": GPT2LikeNormalizedTextConfig, 
        
               "gpt-bigcode": GPTBigCodeNormalizedTextConfig, 
        
               "gpt-neo": NormalizedTextConfig.with_args(num_attention_heads="num_heads"), 
        
               "gpt-neox": NormalizedTextConfig, 
        
               "gptj": GPT2LikeNormalizedTextConfig, 
        
               "imagegpt": GPT2LikeNormalizedTextConfig, 
        
               "llama": NormalizedTextConfigWithGQA, 
        
               "longt5": T5LikeNormalizedTextConfig, 
        
               "marian": BartLikeNormalizedTextConfig, 
        
               "markuplm": NormalizedTextConfig, 
        
               "mbart": BartLikeNormalizedTextConfig, 
        
               "mistral": NormalizedTextConfigWithGQA, 
        
               "mixtral": NormalizedTextConfigWithGQA, 
        
               "mpnet": NormalizedTextConfig, 
        
               "mpt": MPTNormalizedTextConfig, 
        
               "mt5": T5LikeNormalizedTextConfig, 
        
               "m2m-100": BartLikeNormalizedTextConfig, 
        
               "nystromformer": NormalizedTextConfig, 
        
               "opt": NormalizedTextConfig, 
        
               "pegasus": BartLikeNormalizedTextConfig, 
        
               "pix2struct": Pix2StructNormalizedTextConfig, 
        
               "phi": NormalizedTextConfig, 
        
               "phi3": NormalizedTextConfigWithGQA, 
        
               "phi3small": NormalizedTextConfigWithGQA, 
        
               "poolformer": NormalizedVisionConfig, 
        
               "regnet": NormalizedVisionConfig, 
        
               "resnet": NormalizedVisionConfig, 
        
               "roberta": NormalizedTextConfig, 
        
               "segformer": NormalizedSegformerConfig, 
        
               "speech-to-text": SpeechToTextLikeNormalizedTextConfig, 
        
               "splinter": NormalizedTextConfig, 
        
               "t5": T5LikeNormalizedTextConfig, 
        
               "trocr": TrOCRLikeNormalizedTextConfig, 
        
               "vision-encoder-decoder": NormalizedEncoderDecoderConfig, 
        
               "vit": NormalizedVisionConfig, 
        
               "whisper": WhisperLikeNormalizedTextConfig, 
        
               "xlm-roberta": NormalizedTextConfig, 
        
               "yolos": NormalizedVisionConfig, 
        
               "qwen2": NormalizedTextConfig, 
        
               "granite": NormalizedTextConfigWithGQA, 
        
           }

with the former mapping to "bert" and the latter mapping to NormalizedTextConfig seemed to allow me to export the model with optimizations. In my brief testing after that I didn't notice any glaring issues with the output and observed some expected speedups.

Motivation

I would like to export an optimized ONNX version of my ModernBERT model.

Your contribution

I'd be happy to submit a PR if given more information on how this support is typically added.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add ONNX export optimization support for ModernBERT #2177

Feature request

Motivation

Your contribution

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

	class ORTConfigManager:
	"""
	A class that contains all the information needed by ONNX Runtime optimization for a given model type.

	Attributes:
	_conf (`Dict[str]`):
	A dictionary mapping each supported model type to the corresponding ONNX Runtime model type.
	"""

	# Contribution note: Please add new models in alphabetical order
	# TODO: for encoder-decoder models, validate if bert or gpt2 optimization is better
	_conf = {
	"albert": "bert",
	"bart": "bart",
	"bert": "bert",
	"big-bird": "bert",
	"bigbird-pegasus": "bart",
	"blenderbot": "bert",
	"bloom": "gpt2",
	"camembert": "bert",
	"codegen": "gpt2",
	"deberta": "bert",
	"deberta-v2": "bert",
	"distilbert": "bert",
	"electra": "bert",
	"gpt2": "gpt2",
	"gpt-bigcode": "gpt2",
	"gpt-neo": "gpt2",
	"gpt-neox": "gpt2",
	"gptj": "gpt2",
	"granite": "gpt2",
	"longt5": "bert",
	"llama": "gpt2",
	"marian": "bart",
	"mbart": "bart",
	"mistral": "gpt2",
	"mpnet": "bert",
	"mt5": "bart",
	"m2m-100": "bart",
	"nystromformer": "bert",
	"pegasus": "bert",
	"roberta": "bert",
	"segformer": "vit",
	"t5": "bert",
	"vit": "vit",
	"whisper": "bart",
	"xlm-roberta": "bert",
	"pix2struct": "vit",
	}

	_conf = {
	"albert": NormalizedTextConfig,
	"bart": BartLikeNormalizedTextConfig,
	"bert": NormalizedTextConfig,
	"big-bird": NormalizedTextConfig,
	"bigbird-pegasus": BartLikeNormalizedTextConfig,
	"blenderbot": BartLikeNormalizedTextConfig,
	"blenderbot-small": BartLikeNormalizedTextConfig,
	"bloom": NormalizedTextConfig.with_args(num_layers="n_layer"),
	"falcon": NormalizedTextConfig,
	"camembert": NormalizedTextConfig,
	"codegen": GPT2LikeNormalizedTextConfig,
	"cvt": NormalizedVisionConfig,
	"deberta": NormalizedTextConfig,
	"deberta-v2": NormalizedTextConfig,
	"deit": NormalizedVisionConfig,
	"distilbert": NormalizedTextConfig.with_args(num_attention_heads="n_heads", hidden_size="dim"),
	"donut-swin": NormalizedVisionConfig,
	"electra": NormalizedTextConfig,
	"encoder-decoder": NormalizedEncoderDecoderConfig,
	"gemma": NormalizedTextConfigWithGQA,
	"gpt2": GPT2LikeNormalizedTextConfig,
	"gpt-bigcode": GPTBigCodeNormalizedTextConfig,
	"gpt-neo": NormalizedTextConfig.with_args(num_attention_heads="num_heads"),
	"gpt-neox": NormalizedTextConfig,
	"gptj": GPT2LikeNormalizedTextConfig,
	"imagegpt": GPT2LikeNormalizedTextConfig,
	"llama": NormalizedTextConfigWithGQA,
	"longt5": T5LikeNormalizedTextConfig,
	"marian": BartLikeNormalizedTextConfig,
	"markuplm": NormalizedTextConfig,
	"mbart": BartLikeNormalizedTextConfig,
	"mistral": NormalizedTextConfigWithGQA,
	"mixtral": NormalizedTextConfigWithGQA,
	"mpnet": NormalizedTextConfig,
	"mpt": MPTNormalizedTextConfig,
	"mt5": T5LikeNormalizedTextConfig,
	"m2m-100": BartLikeNormalizedTextConfig,
	"nystromformer": NormalizedTextConfig,
	"opt": NormalizedTextConfig,
	"pegasus": BartLikeNormalizedTextConfig,
	"pix2struct": Pix2StructNormalizedTextConfig,
	"phi": NormalizedTextConfig,
	"phi3": NormalizedTextConfigWithGQA,
	"phi3small": NormalizedTextConfigWithGQA,
	"poolformer": NormalizedVisionConfig,
	"regnet": NormalizedVisionConfig,
	"resnet": NormalizedVisionConfig,
	"roberta": NormalizedTextConfig,
	"segformer": NormalizedSegformerConfig,
	"speech-to-text": SpeechToTextLikeNormalizedTextConfig,
	"splinter": NormalizedTextConfig,
	"t5": T5LikeNormalizedTextConfig,
	"trocr": TrOCRLikeNormalizedTextConfig,
	"vision-encoder-decoder": NormalizedEncoderDecoderConfig,
	"vit": NormalizedVisionConfig,
	"whisper": WhisperLikeNormalizedTextConfig,
	"xlm-roberta": NormalizedTextConfig,
	"yolos": NormalizedVisionConfig,
	"qwen2": NormalizedTextConfig,
	"granite": NormalizedTextConfigWithGQA,
	}

Add ONNX export optimization support for ModernBERT #2177

Description

Feature request

Motivation

Your contribution

Metadata

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Issue actions