From 9eeaac7d230695dc75dafc826c43307eb1dcbf72 Mon Sep 17 00:00:00 2001 From: eaidova Date: Thu, 13 Jul 2023 10:27:10 +0400 Subject: [PATCH 01/38] switch on pytorch frontend --- optimum/intel/openvino/export.py | 286 +++++++++++++++++++ optimum/intel/openvino/modeling_base.py | 8 +- optimum/intel/openvino/modeling_decoder.py | 8 +- optimum/intel/openvino/modeling_diffusion.py | 3 +- optimum/intel/openvino/trainer.py | 2 +- 5 files changed, 298 insertions(+), 9 deletions(-) create mode 100644 optimum/intel/openvino/export.py diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py new file mode 100644 index 0000000000..24636f7266 --- /dev/null +++ b/optimum/intel/openvino/export.py @@ -0,0 +1,286 @@ +import os +import logging +import inspect +from inspect import signature +from itertools import chain +from pathlib import Path +from typing import Dict, Iterable, List, Optional, Tuple, Union +import time + +import numpy as np +from transformers.utils import is_tf_available, is_torch_available + +from optimum.utils import TORCH_MINIMUM_VERSION, is_diffusers_available, is_torch_onnx_support_available, logging +from optimum.exporters.onnx.base import OnnxConfig +from optimum.exporters.onnx.convert import export_tensorflow, check_dummy_inputs_are_allowed + +from openvino.tools import mo +from openvino.runtime import serialize, PartialShape +from openvino.runtime.utils.types import get_element_type +from .utils import OV_XML_FILE_NAME + +if is_torch_available(): + import torch + import torch.nn as nn + from transformers.modeling_utils import PreTrainedModel + from transformers.pytorch_utils import is_torch_less_than_1_11 + +if is_diffusers_available(): + from diffusers import ModelMixin + +if is_tf_available(): + from transformers.modeling_tf_utils import TFPreTrainedModel + +def is_torch_model(model): + if not is_torch_available(): + return False + return isinstance(model, nn.Module) + +def export( + model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], + config: OnnxConfig, + output: Path, + opset: Optional[int] = None, + device: str = "cpu", + input_shapes: Optional[Dict] = None, +) -> Tuple[List[str], List[str]]: + """ + Exports a Pytorch or TensorFlow model to an ONNX Intermediate Representation. + + Args: + model ([`PreTrainedModel`] or [`TFPreTrainedModel`]): + The model to export. + config ([`~exporters.onnx.config.OnnxConfig`]): + The ONNX configuration associated with the exported model. + output (`Path`): + Directory to store the exported ONNX model. + opset (`Optional[int]`, defaults to `None`): + The version of the ONNX operator set to use. + device (`str`, *optional*, defaults to `cpu`): + The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for + export on CUDA devices. + input_shapes (`Optional[Dict]`, defaults to `None`): + If specified, allows to use specific shapes for the example input provided to the ONNX exporter. + + Returns: + `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from + the ONNX configuration. + """ + if not (is_torch_available() or is_tf_available()): + raise ImportError( + "Cannot convert because neither PyTorch nor TensorFlow are installed. " + "Please install torch or tensorflow first." + ) + + if "diffusers" in str(model.__class__) and not is_diffusers_available(): + raise ImportError("The pip package `diffusers` is required to export stable diffusion models to ONNX.") + + if is_torch_available() and isinstance(model, nn.Module): + return export_pytorch(model, config, opset, output, device=device, input_shapes=input_shapes) + + elif is_tf_available() and issubclass(type(model), TFPreTrainedModel): + output.parent.mkdir(parents=True, exist_ok=True) + if opset is None: + opset = config.DEFAULT_ONNX_OPSET + if device == "cuda": + raise RuntimeError("`tf2onnx` does not support export on CUDA device.") + if input_shapes is not None: + print("`input_shapes` argument is not supported by the Tensorflow ONNX export and will be ignored.") + return export_tensorflow(model, config, opset, output) + + else: + raise RuntimeError( + "You either provided a PyTorch model with only TensorFlow installed, or a TensorFlow model with only PyTorch installed." + ) + + +def export_pytorch( + model: Union["PreTrainedModel", "ModelMixin"], + config: OnnxConfig, + opset: int, + output: Path, + device: str = "cpu", + input_shapes: Optional[Dict] = None, +) -> Tuple[List[str], List[str]]: + """ + Exports a PyTorch model to an ONNX Intermediate Representation. + + Args: + model ([`PreTrainedModel`]): + The model to export. + config ([`~exporters.onnx.config.OnnxConfig`]): + The ONNX configuration associated with the exported model. + opset (`int`): + The version of the ONNX operator set to use. + output (`Path`): + Directory to store the exported ONNX model. + device (`str`, defaults to `"cpu"`): + The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for + export on CUDA devices. + input_shapes (`optional[Dict]`, defaults to `None`): + If specified, allows to use specific shapes for the example input provided to the ONNX exporter. + + Returns: + `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from + the ONNX configuration. + """ + import torch + from torch.onnx import export as onnx_export + from torch.utils._pytree import tree_map + + print(f"Using framework PyTorch: {torch.__version__}") + + with torch.no_grad(): + model.config.return_dict = True + model.config.torchscript = True + model.eval() + + # Check if we need to override certain configuration item + if config.values_override is not None: + print(f"Overriding {len(config.values_override)} configuration item(s)") + for override_config_key, override_config_value in config.values_override.items(): + print(f"\t- {override_config_key} -> {override_config_value}") + setattr(model.config, override_config_key, override_config_value) + + if input_shapes is None: + input_shapes = {} # will use the defaults from DEFAULT_DUMMY_SHAPES + + # Check that inputs match, and order them properly + dummy_inputs = config.generate_dummy_inputs(framework="pt", **input_shapes) + device = torch.device(device) + if device.type == "cuda" and torch.cuda.is_available(): + model.to(device) + dummy_inputs = tree_map( + lambda value: value.to(device) if isinstance(value, torch.Tensor) else value, dummy_inputs + ) + check_dummy_inputs_are_allowed(model, dummy_inputs) + inputs = config.ordered_inputs(model) + input_names = list(inputs.keys()) + output_names = list(config.outputs.keys()) + + if hasattr(config, "patch_ops"): + config.patch_ops() + + if hasattr(model, "forward"): + sig = inspect.signature(model.forward) + else: + sig = inspect.signature(model.call) + + input_info = get_input_shapes(dummy_inputs, inputs) + start0 = time.perf_counter() + ov_model = mo.convert_model(model, example_input=dummy_inputs, input=input_info) + end0 = time.perf_counter() + print(f"Convert model took {end0 - start0}s") + ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} + ordered_input_names = list(inputs) + flatten_inputs = flattenize_inputs(ordered_dummy_inputs.values()) + + for idx, inp_tensor in enumerate(ov_model.inputs): + input_name = ordered_input_names[idx] + inp_tensor.get_tensor().set_names({input_name}) + inp_data = flatten_inputs[idx] + static_shape = PartialShape(inp_data.shape) + dims = inputs[input_name] + + for dim in dims: + static_shape[dim] = -1 + inp_tensor.get_node().set_partial_shape(static_shape) + inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) + + for idx, out_tensor in enumerate(ov_model.outputs): + if idx < len(output_names): + out_tensor.get_tensor().set_names({output_names[idx]}) + ov_model.validate_nodes_and_infer_types() + start1 = time.perf_counter() + serialize(ov_model, output.parent / OV_XML_FILE_NAME) + end1 = time.perf_counter() + print(f"Serailize model took {end1 - start1}s") + if hasattr(config, "restore_ops"): + config.restore_ops() + + return input_names, output_names + + +def export_models( + models_and_onnx_configs: Dict[ + str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], "OnnxConfig"] + ], + output_dir: Path, + opset: Optional[int] = None, + output_names: Optional[List[str]] = None, + device: str = "cpu", + input_shapes: Optional[Dict] = None, +) -> Tuple[List[List[str]], List[List[str]]]: + """ + Exports a Pytorch or TensorFlow encoder decoder model to an ONNX Intermediate Representation. + The following method exports the encoder and decoder components of the model as separate + ONNX files. + + Args: + models_and_onnx_configs (`Dict[str, Tuple[Union[`PreTrainedModel`, `TFPreTrainedModel`], `OnnxConfig`]]): + A dictionnary containing the models to export and their corresponding onnx configs. + output_dir (`Path`): + Output directory to store the exported ONNX models. + opset (`Optional[int]`, defaults to `None`): + The version of the ONNX operator set to use. + output_names (`Optional[List[str]]`, defaults to `None`): + The names to use for the exported ONNX files. The order must be the same as the order of submodels in the ordered dict `models_and_onnx_configs`. + If None, will use the keys from `models_and_onnx_configs` as names. + device (`str`, defaults to `"cpu"`): + The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for + export on CUDA devices. + input_shapes (`Optional[Dict]`, defaults to `None`): + If specified, allows to use specific shapes for the example input provided to the ONNX exporter. + Returns: + `Tuple[List[List[str]], List[List[str]]]`: A tuple with an ordered list of the model's inputs, and the named + inputs from the ONNX configuration. + """ + outputs = [] + + if output_names is not None and len(output_names) != len(models_and_onnx_configs): + raise ValueError( + f"Provided custom names {output_names} for the export of {len(models_and_onnx_configs)} models. Please provide the same number of names as models to export." + ) + + for i, model_name in enumerate(models_and_onnx_configs.keys()): + submodel, sub_onnx_config = models_and_onnx_configs[model_name] + output_name = output_names[i] if output_names is not None else Path(model_name + ".xml") + output_path = output_dir / output_name + output_path.parent.mkdir(parents=True, exist_ok=True) + outputs.append( + export( + model=submodel, + config=sub_onnx_config, + output=output_path, + opset=opset, + device=device, + input_shapes=input_shapes, + ) + ) + + outputs = list(map(list, zip(*outputs))) + return outputs + + +def flattenize_inputs(inputs): + flatten_inputs = [] + for input_data in inputs: + if isinstance(input_data, (list, tuple)): + flatten_inputs.extend(flattenize_inputs(input_data)) + else: + flatten_inputs.append(input_data) + return flatten_inputs + + +def get_input_shapes(dummy_inputs, inputs): + input_info = [] + for input_name, data in dummy_inputs.items(): + if isinstance(data, (tuple, list)): + return None + static_shape = PartialShape(data.shape) + if input_name in inputs: + dynamic_dims = inputs[input_name] + for dim in dynamic_dims: + static_shape[dim] = -1 + input_info.append((input_name, static_shape)) + return input_info \ No newline at end of file diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 14ac76137f..bc1bf6cbbd 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -21,12 +21,14 @@ import openvino from huggingface_hub import hf_hub_download from openvino._offline_transformations import apply_moc_transformations, compress_model_transformation +from openvino.tools import mo from openvino.runtime import Core from transformers import PretrainedConfig from transformers.file_utils import add_start_docstrings -from optimum.exporters.onnx import OnnxConfig, export +from optimum.exporters.onnx import OnnxConfig from optimum.exporters.tasks import TasksManager +from .export import export, is_torch_model from optimum.modeling_base import OptimizedModel from ..utils.import_utils import is_transformers_version @@ -130,7 +132,7 @@ def fix_op_names_duplicates(model: openvino.runtime.Model): file_name = Path(file_name) bin_file_name = file_name.with_suffix(".bin") if file_name.suffix == ".xml" else None - model = core.read_model(file_name, bin_file_name) + model = core.read_model(file_name, bin_file_name) if not file_name.suffix == ".onnx" else mo.convert_model(file_name) if file_name.suffix == ".onnx": model = fix_op_names_duplicates(model) # should be called during model conversion to IR @@ -315,7 +317,7 @@ def _to_onnx_to_load( return cls._from_pretrained( model_id=save_dir_path, config=config, - from_onnx=True, + from_onnx=not is_torch_model(model), use_auth_token=use_auth_token, revision=revision, force_download=force_download, diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 16cf6c20d3..01c3ffae26 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -27,8 +27,8 @@ from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward from transformers.modeling_outputs import CausalLMOutputWithPast +from optimum.exporters import TasksManager from optimum.exporters.onnx import export -from optimum.exporters.tasks import TasksManager from optimum.utils import NormalizedConfigManager from ..utils.import_utils import is_transformers_version @@ -234,7 +234,7 @@ def _from_transformers( # TODO : create ModelPatcher to patch each architecture if config.model_type == "bloom": model.transformer._prepare_attn_mask = _prepare_attn_mask - elif config.model_type == "llama": + elif config.model_type in {"llama", "longllama"}: model.model._prepare_decoder_attention_mask = _prepare_decoder_attention_mask elif config.model_type in {"blenderbot-small", "blenderbot", "opt", "pegasus", "bart"}: model.model.decoder._prepare_decoder_attention_mask = _prepare_decoder_attention_mask @@ -245,12 +245,12 @@ def _from_transformers( return cls._from_pretrained( model_id=save_dir_path, config=config, - from_onnx=True, + from_onnx=not is_torch_model(model), use_auth_token=use_auth_token, revision=revision, force_download=force_download, cache_dir=cache_dir, - file_name=ONNX_WEIGHTS_NAME, + file_name=ONNX_WEIGHTS_NAME if not is_torch_model(model) else OV_XML_FILE_NAME, local_files_only=local_files_only, use_cache=use_cache, **kwargs, diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index 73ec66d473..4c54e69614 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -51,6 +51,7 @@ ) from .loaders import OVTextualInversionLoaderMixin +from .export import export_models from .modeling_base import OVBaseModel from .utils import ONNX_WEIGHTS_NAME, OV_TO_NP_TYPE, OV_XML_FILE_NAME @@ -312,7 +313,7 @@ def _from_transformers( return cls._from_pretrained( model_id=save_dir_path, config=config, - from_onnx=True, + from_onnx=False, use_auth_token=use_auth_token, revision=revision, force_download=force_download, diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py index 811309806a..091d28047a 100644 --- a/optimum/intel/openvino/trainer.py +++ b/optimum/intel/openvino/trainer.py @@ -41,7 +41,7 @@ from nncf.torch.quantization.algo import QuantizationController from openvino._offline_transformations import compress_quantize_weights_transformation from openvino.runtime import Core, PartialShape, serialize -from openvino.tools.mo.back.offline_transformations import ( +from openvino.tools.ovc.moc_frontend.offline_transformations import ( apply_fused_names_cleanup, apply_moc_transformations, apply_user_transformations, From 680e383cd8c1c9e6b5453e926b75f9f75e458ecd Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 19 Jul 2023 11:41:07 +0400 Subject: [PATCH 02/38] fixes for seq2seq --- optimum/intel/openvino/export.py | 74 +++++++++++++------ optimum/intel/openvino/modeling_base.py | 5 +- .../intel/openvino/modeling_base_seq2seq.py | 13 ++-- optimum/intel/openvino/modeling_decoder.py | 5 ++ optimum/intel/openvino/modeling_seq2seq.py | 1 + 5 files changed, 67 insertions(+), 31 deletions(-) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py index 24636f7266..a2012954bf 100644 --- a/optimum/intel/openvino/export.py +++ b/optimum/intel/openvino/export.py @@ -1,29 +1,24 @@ -import os -import logging import inspect -from inspect import signature -from itertools import chain from pathlib import Path -from typing import Dict, Iterable, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Tuple, Union, Any +import functools import time -import numpy as np from transformers.utils import is_tf_available, is_torch_available -from optimum.utils import TORCH_MINIMUM_VERSION, is_diffusers_available, is_torch_onnx_support_available, logging +from optimum.utils import is_diffusers_available from optimum.exporters.onnx.base import OnnxConfig from optimum.exporters.onnx.convert import export_tensorflow, check_dummy_inputs_are_allowed +from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx -from openvino.tools import mo +from openvino.tools.mo import convert_model from openvino.runtime import serialize, PartialShape from openvino.runtime.utils.types import get_element_type from .utils import OV_XML_FILE_NAME if is_torch_available(): - import torch import torch.nn as nn from transformers.modeling_utils import PreTrainedModel - from transformers.pytorch_utils import is_torch_less_than_1_11 if is_diffusers_available(): from diffusers import ModelMixin @@ -101,6 +96,7 @@ def export_pytorch( output: Path, device: str = "cpu", input_shapes: Optional[Dict] = None, + model_kwargs: Optional[Dict[str, Any]] = None, ) -> Tuple[List[str], List[str]]: """ Exports a PyTorch model to an ONNX Intermediate Representation. @@ -132,7 +128,8 @@ def export_pytorch( with torch.no_grad(): model.config.return_dict = True - model.config.torchscript = True + custom_patcher = type(config).patch_model_for_export != OnnxConfig.patch_model_for_export + model.config.torchscript = not custom_patcher model.eval() # Check if we need to override certain configuration item @@ -157,23 +154,42 @@ def export_pytorch( inputs = config.ordered_inputs(model) input_names = list(inputs.keys()) output_names = list(config.outputs.keys()) - - if hasattr(config, "patch_ops"): - config.patch_ops() - if hasattr(model, "forward"): sig = inspect.signature(model.forward) else: sig = inspect.signature(model.call) + dummy_inputs = remove_none_from_dummy_inputs(dummy_inputs) input_info = get_input_shapes(dummy_inputs, inputs) start0 = time.perf_counter() - ov_model = mo.convert_model(model, example_input=dummy_inputs, input=input_info) + try: + if custom_patcher: + patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs) + patched_forward = patcher.patched_forward + @functools.wraps(patched_forward) + def ts_patched_forward(*args, **kwargs): + outputs = patched_forward(*args, **kwargs) + return tuple(outputs.values()) + patcher.patched_forward = ts_patched_forward + with patcher: + ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) + else: + ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) + except Exception: + onnx_output = output.with_suffix(".onnx") + input_names, output_names = export_pytorch_to_onnx(model, config, opset, onnx_output, device, input_shapes, model_kwargs) + ov_model = convert_model(onnx_output) + serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) + return input_names, output_names + end0 = time.perf_counter() print(f"Convert model took {end0 - start0}s") ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} ordered_input_names = list(inputs) flatten_inputs = flattenize_inputs(ordered_dummy_inputs.values()) + for idx, out_tensor in enumerate(ov_model.outputs): + if idx < len(output_names): + out_tensor.get_tensor().set_names({output_names[idx]}) for idx, inp_tensor in enumerate(ov_model.inputs): input_name = ordered_input_names[idx] @@ -186,18 +202,11 @@ def export_pytorch( static_shape[dim] = -1 inp_tensor.get_node().set_partial_shape(static_shape) inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) - - for idx, out_tensor in enumerate(ov_model.outputs): - if idx < len(output_names): - out_tensor.get_tensor().set_names({output_names[idx]}) ov_model.validate_nodes_and_infer_types() start1 = time.perf_counter() - serialize(ov_model, output.parent / OV_XML_FILE_NAME) + serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) end1 = time.perf_counter() print(f"Serailize model took {end1 - start1}s") - if hasattr(config, "restore_ops"): - config.restore_ops() - return input_names, output_names @@ -265,6 +274,8 @@ def export_models( def flattenize_inputs(inputs): flatten_inputs = [] for input_data in inputs: + if input_data is None: + continue if isinstance(input_data, (list, tuple)): flatten_inputs.extend(flattenize_inputs(input_data)) else: @@ -272,6 +283,21 @@ def flattenize_inputs(inputs): return flatten_inputs +def remove_none_from_dummy_inputs(dummy_inputs): + def remove_none_from_list_tuple(item): + new_item = [i for i in item if i is not None] + return type(item)(new_item) + + upd_dummy = {} + for k, v in dummy_inputs.items(): + if v is None: + continue + if isinstance(v, (tuple, list)): + upd_dummy[k] = remove_none_from_list_tuple(v) + continue + upd_dummy[k] = v + return upd_dummy + def get_input_shapes(dummy_inputs, inputs): input_info = [] for input_name, data in dummy_inputs.items(): diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index bc1bf6cbbd..71e74b154e 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -17,6 +17,7 @@ from pathlib import Path from tempfile import TemporaryDirectory from typing import Dict, Optional, Union +import time import openvino from huggingface_hub import hf_hub_download @@ -131,8 +132,10 @@ def fix_op_names_duplicates(model: openvino.runtime.Model): if isinstance(file_name, str): file_name = Path(file_name) bin_file_name = file_name.with_suffix(".bin") if file_name.suffix == ".xml" else None - + s = time.perf_counter() model = core.read_model(file_name, bin_file_name) if not file_name.suffix == ".onnx" else mo.convert_model(file_name) + e = time.perf_counter() + print(f"Read model took {e - s}s") if file_name.suffix == ".onnx": model = fix_op_names_duplicates(model) # should be called during model conversion to IR diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py index a8ce3d0bf5..af5e2388f8 100644 --- a/optimum/intel/openvino/modeling_base_seq2seq.py +++ b/optimum/intel/openvino/modeling_base_seq2seq.py @@ -24,8 +24,9 @@ from transformers import PretrainedConfig from transformers.file_utils import add_start_docstrings -from optimum.exporters.onnx import export_models, get_encoder_decoder_models_for_export -from optimum.exporters.tasks import TasksManager +from optimum.exporters import TasksManager +from optimum.exporters.onnx import get_encoder_decoder_models_for_export +from .export import export_models from ..utils.import_utils import is_transformers_version from .modeling_base import OVBaseModel @@ -243,9 +244,6 @@ def _from_transformers( kwargs (`Dict`, *optional*): kwargs will be passed to the model during initialization """ - encoder_file_name = os.path.join("encoder", ONNX_ENCODER_NAME) - decoder_file_name = os.path.join("decoder", ONNX_DECODER_NAME) - decoder_with_past_file_name = os.path.join("decoder_with_past", ONNX_DECODER_WITH_PAST_NAME) task = task or cls.export_feature save_dir = TemporaryDirectory() @@ -265,6 +263,9 @@ def _from_transformers( onnx_config_constructor = TasksManager.get_exporter_config_constructor(model=model, exporter="onnx", task=task) onnx_config = onnx_config_constructor(model.config, use_past=use_cache) models_and_onnx_configs = get_encoder_decoder_models_for_export(model, onnx_config) + encoder_file_name = os.path.join("encoder", OV_ENCODER_NAME) + decoder_file_name = os.path.join("decoder", OV_DECODER_NAME) + decoder_with_past_file_name = os.path.join("decoder_with_past", OV_DECODER_WITH_PAST_NAME) output_names = [encoder_file_name, decoder_file_name] if use_cache is True: @@ -281,7 +282,7 @@ def _from_transformers( model_id=save_dir_path, config=config, use_cache=use_cache, - from_onnx=True, + from_onnx=False, use_auth_token=use_auth_token, revision=revision, force_download=force_download, diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 01c3ffae26..cf2100bac3 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -17,6 +17,7 @@ from pathlib import Path from tempfile import TemporaryDirectory from typing import Dict, Optional, Tuple, Union +import time import numpy as np import openvino @@ -30,6 +31,7 @@ from optimum.exporters import TasksManager from optimum.exporters.onnx import export from optimum.utils import NormalizedConfigManager +#from optimum.exporters.onnx import export from ..utils.import_utils import is_transformers_version from ..utils.modeling_utils import _prepare_attn_mask, _prepare_decoder_attention_mask @@ -225,7 +227,10 @@ def _from_transformers( "force_download": force_download, "trust_remote_code": trust_remote_code, } + start0 = time.perf_counter() model = TasksManager.get_model_from_task(task, model_id, **model_kwargs) + end0 = time.perf_counter() + print(f"Reading PT model took {end0 - start0}") config.is_decoder = True config.is_encoder_decoder = False onnx_config_constructor = TasksManager.get_exporter_config_constructor(model=model, exporter="onnx", task=task) diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py index 0f52335639..9994b70d64 100644 --- a/optimum/intel/openvino/modeling_seq2seq.py +++ b/optimum/intel/openvino/modeling_seq2seq.py @@ -413,6 +413,7 @@ def forward( if "encoder_hidden_states" in self.input_names and encoder_hidden_states is not None: inputs["encoder_hidden_states"] = encoder_hidden_states + print(self.model) # Run inference self.request.start_async(inputs, shared_memory=True) self.request.wait() From a4d7d265dca4525256a38cdce5374e1a80bce5f7 Mon Sep 17 00:00:00 2001 From: eaidova Date: Fri, 28 Jul 2023 08:33:38 +0400 Subject: [PATCH 03/38] wip --- optimum/intel/openvino/export.py | 328 ++++++++++++++++++- optimum/intel/openvino/modeling_decoder.py | 3 +- optimum/intel/openvino/modeling_diffusion.py | 2 +- 3 files changed, 324 insertions(+), 9 deletions(-) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py index a2012954bf..bf5cc7b933 100644 --- a/optimum/intel/openvino/export.py +++ b/optimum/intel/openvino/export.py @@ -1,15 +1,20 @@ import inspect +import os from pathlib import Path -from typing import Dict, List, Optional, Tuple, Union, Any +from typing import Dict, List, Optional, Tuple, Union, Any, Callable import functools import time from transformers.utils import is_tf_available, is_torch_available +from transformers import AutoTokenizer -from optimum.utils import is_diffusers_available -from optimum.exporters.onnx.base import OnnxConfig +from optimum.utils import is_diffusers_available, DEFAULT_DUMMY_SHAPES +from optimum.exporters import TasksManager +from optimum.exporters.onnx.base import OnnxConfig, OnnxConfigWithPast from optimum.exporters.onnx.convert import export_tensorflow, check_dummy_inputs_are_allowed from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx +from optimum.utils.save_utils import maybe_save_preprocessors +from optimum.exporters.onnx import __main__ from openvino.tools.mo import convert_model from openvino.runtime import serialize, PartialShape @@ -38,6 +43,7 @@ def export( opset: Optional[int] = None, device: str = "cpu", input_shapes: Optional[Dict] = None, + model_kwargs: Optional[Dict[str, Any]] = None, ) -> Tuple[List[str], List[str]]: """ Exports a Pytorch or TensorFlow model to an ONNX Intermediate Representation. @@ -71,7 +77,7 @@ def export( raise ImportError("The pip package `diffusers` is required to export stable diffusion models to ONNX.") if is_torch_available() and isinstance(model, nn.Module): - return export_pytorch(model, config, opset, output, device=device, input_shapes=input_shapes) + return export_pytorch(model, config, opset, output, device=device, input_shapes=input_shapes, model_kwargs=model_kwargs) elif is_tf_available() and issubclass(type(model), TFPreTrainedModel): output.parent.mkdir(parents=True, exist_ok=True) @@ -219,6 +225,7 @@ def export_models( output_names: Optional[List[str]] = None, device: str = "cpu", input_shapes: Optional[Dict] = None, + model_kwargs: Optional[Dict[str, Any]] = None, ) -> Tuple[List[List[str]], List[List[str]]]: """ Exports a Pytorch or TensorFlow encoder decoder model to an ONNX Intermediate Representation. @@ -264,6 +271,7 @@ def export_models( opset=opset, device=device, input_shapes=input_shapes, + model_kwargs=model_kwargs, ) ) @@ -292,6 +300,10 @@ def remove_none_from_list_tuple(item): for k, v in dummy_inputs.items(): if v is None: continue + if isinstance(v, dict): + for kk, vv in v.items(): + upd_dummy[kk] = vv + continue if isinstance(v, (tuple, list)): upd_dummy[k] = remove_none_from_list_tuple(v) continue @@ -301,7 +313,7 @@ def remove_none_from_list_tuple(item): def get_input_shapes(dummy_inputs, inputs): input_info = [] for input_name, data in dummy_inputs.items(): - if isinstance(data, (tuple, list)): + if isinstance(data, (tuple, list, dict)): return None static_shape = PartialShape(data.shape) if input_name in inputs: @@ -309,4 +321,308 @@ def get_input_shapes(dummy_inputs, inputs): for dim in dynamic_dims: static_shape[dim] = -1 input_info.append((input_name, static_shape)) - return input_info \ No newline at end of file + return input_info + + +def main_export( + model_name_or_path: str, + output: Union[str, Path], + task: str = "auto", + device: str = "cpu", + fp16: Optional[bool] = False, + optimize: Optional[str] = None, + monolith: bool = False, + framework: Optional[str] = None, + cache_dir: Optional[str] = None, + trust_remote_code: bool = False, + pad_token_id: Optional[int] = None, + subfolder: str = "", + revision: str = "main", + force_download: bool = False, + local_files_only: bool = False, + use_auth_token: Optional[Union[bool, str]] = None, + for_ort: bool = False, + model_kwargs: Optional[Dict[str, Any]] = None, + custom_onnx_configs: Optional[Dict[str, "OnnxConfig"]] = None, + fn_get_submodels: Optional[Callable] = None, + **kwargs_shapes, +): + """ + Full-suite ONNX export. + + Args: + > Required parameters + + model_name_or_path (`str`): + Model ID on huggingface.co or path on disk to the model repository to export. + output (`Union[str, Path]`): + Path indicating the directory where to store the generated ONNX model. + + > Optional parameters + + task (`Optional[str]`, defaults to `None`): + The task to export the model for. If not specified, the task will be auto-inferred based on the model. For decoder models, + use `xxx-with-past` to export the model using past key values in the decoder. + opset (`Optional[int]`, defaults to `None`): + If specified, ONNX opset version to export the model with. Otherwise, the default opset for the given model architecture + will be used. + device (`str`, defaults to `"cpu"`): + The device to use to do the export. Defaults to "cpu". + fp16 (`Optional[bool]`, defaults to `"False"`): + Use half precision during the export. PyTorch-only, requires `device="cuda"`. + optimize (`Optional[str]`, defaults to `None`): + Allows to run ONNX Runtime optimizations directly during the export. Some of these optimizations are specific to + ONNX Runtime, and the resulting ONNX will not be usable with other runtime as OpenVINO or TensorRT. + Available options: `"O1", "O2", "O3", "O4"`. Reference: [`~optimum.onnxruntime.AutoOptimizationConfig`] + monolith (`bool`, defaults to `False`): + Forces to export the model as a single ONNX file. + no_post_process (`bool`, defaults to `False`): + Allows to disable any post-processing done by default on the exported ONNX models. + framework (`Optional[str]`, defaults to `None`): + The framework to use for the ONNX export (`"pt"` or `"tf"`). If not provided, will attempt to automatically detect + the framework for the checkpoint. + atol (`Optional[float]`, defaults to `None`): + If specified, the absolute difference tolerance when validating the model. Otherwise, the default atol for the model will be used. + cache_dir (`Optional[str]`, defaults to `None`): + Path indicating where to store cache. The default Hugging Face cache path will be used by default. + trust_remote_code (`bool`, defaults to `False`): + Allows to use custom code for the modeling hosted in the model repository. This option should only be set for repositories + you trust and in which you have read the code, as it will execute on your local machine arbitrary code present in the + model repository. + pad_token_id (`Optional[int]`, defaults to `None`): + This is needed by some models, for some tasks. If not provided, will attempt to use the tokenizer to guess it. + subfolder (`str`, defaults to `""`): + In case the relevant files are located inside a subfolder of the model repo either locally or on huggingface.co, you can + specify the folder name here. + revision (`str`, defaults to `"main"`): + Revision is the specific model version to use. It can be a branch name, a tag name, or a commit id. + force_download (`bool`, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + local_files_only (`Optional[bool]`, defaults to `False`): + Whether or not to only look at local files (i.e., do not try to download the model). + use_auth_token (`Optional[str]`, defaults to `None`): + The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated + when running `transformers-cli login` (stored in `~/.huggingface`). + model_kwargs (`Optional[Dict[str, Any]]`, defaults to `None`): + Experimental usage: keyword arguments to pass to the model during + the export. This argument should be used along the `custom_onnx_configs` argument + in case, for example, the model inputs/outputs are changed (for example, if + `model_kwargs={"output_attentions": True}` is passed). + custom_onnx_configs (`Optional[Dict[str, OnnxConfig]]`, defaults to `None`): + Experimental usage: override the default ONNX config used for the given model. This argument may be useful for advanced users that desire a finer-grained control on the export. An example is available [here](https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model). + fn_get_submodels (`Optional[Callable]`, defaults to `None`): + Experimental usage: Override the default submodels that are used at the export. This is + especially useful when exporting a custom architecture that needs to split the ONNX (e.g. encoder-decoder). If unspecified with custom models, optimum will try to use the default submodels used for the given task, with no guarantee of success. + use_subprocess (`bool`): + Do the ONNX exported model validation in subprocesses. This is especially useful when + exporting on CUDA device, where ORT does not release memory at inference session + destruction. When set to `True`, the `main_export` call should be guarded in + `if __name__ == "__main__":` block. + **kwargs_shapes (`Dict`): + Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export. + + Example usage: + ```python + >>> from optimum.exporters.onnx import main_export + + >>> main_export("gpt2", output="gpt2_onnx/") + ``` + """ + if optimize == "O4" and device != "cuda": + raise ValueError( + "Requested O4 optimization, but this optimization requires to do the export on GPU." + " Please pass the argument `--device cuda`." + ) + + if (framework == "tf" and fp16 is True) or not is_torch_available(): + raise ValueError("The --fp16 option is supported only for PyTorch.") + + if fp16 is True and device == "cpu": + raise ValueError( + "The --fp16 option is supported only when exporting on GPU. Please pass the option `--device cuda`." + ) + + output = Path(output) + if not output.exists(): + output.mkdir(parents=True) + + if for_ort: + logger.warning( + "The option --for-ort was passed, but its behavior is now the default in the ONNX exporter" + " and passing it is not required anymore." + ) + + original_task = task + task = TasksManager.map_from_synonym(task) + + framework = TasksManager.determine_framework(model_name_or_path, subfolder=subfolder, framework=framework) + + # get the shapes to be used to generate dummy inputs + input_shapes = {} + for input_name in DEFAULT_DUMMY_SHAPES.keys(): + input_shapes[input_name] = ( + kwargs_shapes[input_name] if input_name in kwargs_shapes else DEFAULT_DUMMY_SHAPES[input_name] + ) + + torch_dtype = None if fp16 is False else torch.float16 + + if task == "auto": + try: + task = TasksManager.infer_task_from_model(model_name_or_path) + except KeyError as e: + raise KeyError( + f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" + ) + except RequestsConnectionError as e: + raise RequestsConnectionError( + f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" + ) + + model = TasksManager.get_model_from_task( + task, + model_name_or_path, + subfolder=subfolder, + revision=revision, + cache_dir=cache_dir, + use_auth_token=use_auth_token, + local_files_only=local_files_only, + force_download=force_download, + trust_remote_code=trust_remote_code, + framework=framework, + torch_dtype=torch_dtype, + device=device, + ) + + custom_architecture = False + is_stable_diffusion = "stable-diffusion" in task + model_type = "stable-diffusion" if is_stable_diffusion else model.config.model_type.replace("_", "-") + + if not is_stable_diffusion: + if model_type in TasksManager._UNSUPPORTED_CLI_MODEL_TYPE: + raise ValueError( + f"{model_type} is not supported yet. Only {TasksManager._SUPPORTED_CLI_MODEL_TYPE} are supported. " + f"If you want to support {model_type} please propose a PR or open up an issue." + ) + if model.config.model_type.replace("-", "_") not in TasksManager.get_supported_model_type_for_task( + task, exporter="onnx" + ): + custom_architecture = True + + # TODO: support onnx_config.py in the model repo + if custom_architecture and custom_onnx_configs is None: + raise ValueError( + "Trying to export a model with a custom architecture, but no custom onnx configuration was passed as `custom_onnx_configs`. Please refer to https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#custom-export-of-transformers-models for an example on how to export custom models." + ) + + if custom_architecture and original_task == "auto": + raise ValueError( + f'Automatic task detection is not supported with custom architectures. Please specify the `task` argument. Suggestion: task="{task}" (or task="{task}-with-past" if the model is decoder-based and supports KV cache)' + ) + + if ( + not custom_architecture + and not is_stable_diffusion + and task + "-with-past" in TasksManager.get_supported_tasks_for_model_type(model_type, "onnx") + ): + if original_task == "auto": # Make -with-past the default if --task was not explicitely specified + task = task + "-with-past" + else: + print( + f"The task `{task}` was manually specified, and past key values will not be reused in the decoding." + f" if needed, please pass `--task {task}-with-past` to export using the past key values." + ) + + if task.endswith("-with-past") and monolith is True: + task_non_past = task.replace("-with-past", "") + raise ValueError( + f"The task {task} is not compatible with the --monolith argument. Please either use" + f" `--task {task_non_past} --monolith`, or `--task {task}` without the monolith argument." + ) + + if original_task == "auto": + synonyms_for_task = sorted(TasksManager.synonyms_for_task(task)) + if synonyms_for_task: + synonyms_for_task = ", ".join(synonyms_for_task) + possible_synonyms = f" (possible synonyms are: {synonyms_for_task})" + else: + possible_synonyms = "" + print(f"Automatic task detection to {task}{possible_synonyms}.") + + onnx_config, models_and_onnx_configs = __main__._get_submodels_and_onnx_configs( + model=model, + task=task, + monolith=monolith, + custom_onnx_configs=custom_onnx_configs if custom_onnx_configs is not None else {}, + custom_architecture=custom_architecture, + fn_get_submodels=fn_get_submodels, + ) + + if not is_stable_diffusion: + needs_pad_token_id = ( + isinstance(onnx_config, OnnxConfigWithPast) + and getattr(model.config, "pad_token_id", None) is None + and task in ["text-classification"] + ) + if needs_pad_token_id: + if pad_token_id is not None: + model.config.pad_token_id = pad_token_id + else: + try: + tok = AutoTokenizer.from_pretrained(model_name_or_path) + model.config.pad_token_id = tok.pad_token_id + except Exception: + raise ValueError( + "Could not infer the pad token id, which is needed in this case, please provide it with the --pad_token_id argument" + ) + # Saving the model config and preprocessor as this is needed sometimes. + model.config.save_pretrained(output) + generation_config = getattr(model, "generation_config", None) + if generation_config is not None: + generation_config.save_pretrained(output) + maybe_save_preprocessors(model_name_or_path, output) + + if model.config.is_encoder_decoder and task.startswith("text-generation"): + raise ValueError( + f"model.config.is_encoder_decoder is True and task is `{task}`, which are incompatible. If the task was auto-inferred, please fill a bug report" + f"at https://github.com/huggingface/optimum, if --task was explicitely passed, make sure you selected the right task for the model," + f" referring to `optimum.exporters.tasks.TaskManager`'s `_TASKS_TO_AUTOMODELS`." + ) + + files_subpaths = None + else: + # save the subcomponent configuration + for model_name in models_and_onnx_configs: + subcomponent = models_and_onnx_configs[model_name][0] + if hasattr(subcomponent, "save_config"): + subcomponent.save_config(output / model_name) + elif hasattr(subcomponent, "config") and hasattr(subcomponent.config, "save_pretrained"): + subcomponent.config.save_pretrained(output / model_name) + + files_subpaths = [os.path.join(name_dir, OV_XML_FILE_NAME) for name_dir in models_and_onnx_configs] + + # Saving the additional components needed to perform inference. + model.scheduler.save_pretrained(output.joinpath("scheduler")) + + feature_extractor = getattr(model, "feature_extractor", None) + if feature_extractor is not None: + feature_extractor.save_pretrained(output.joinpath("feature_extractor")) + + tokenizer = getattr(model, "tokenizer", None) + if tokenizer is not None: + tokenizer.save_pretrained(output.joinpath("tokenizer")) + + tokenizer_2 = getattr(model, "tokenizer_2", None) + if tokenizer_2 is not None: + tokenizer_2.save_pretrained(output.joinpath("tokenizer_2")) + + model.save_config(output) + + export_models( + models_and_onnx_configs=models_and_onnx_configs, + output_dir=output, + output_names=files_subpaths, + input_shapes=input_shapes, + device=device, + model_kwargs=model_kwargs, + ) \ No newline at end of file diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index cf2100bac3..bcdc077a3d 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -29,14 +29,13 @@ from transformers.modeling_outputs import CausalLMOutputWithPast from optimum.exporters import TasksManager -from optimum.exporters.onnx import export from optimum.utils import NormalizedConfigManager -#from optimum.exporters.onnx import export from ..utils.import_utils import is_transformers_version from ..utils.modeling_utils import _prepare_attn_mask, _prepare_decoder_attention_mask from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, STR_TO_OV_TYPE +from .export import export, is_torch_model if is_transformers_version("<", "4.25.0"): diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index 4c54e69614..c0d0870db3 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -36,7 +36,7 @@ from openvino.runtime import Core from transformers import CLIPFeatureExtractor, CLIPTokenizer -from optimum.exporters.onnx import main_export +from .export import main_export from optimum.pipelines.diffusers.pipeline_stable_diffusion import StableDiffusionPipelineMixin from optimum.pipelines.diffusers.pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipelineMixin from optimum.pipelines.diffusers.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipelineMixin From 323776375d1d01b443bcecc8fffe141322f6e9d5 Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 2 Aug 2023 13:57:17 +0400 Subject: [PATCH 04/38] cleanup --- optimum/intel/openvino/export.py | 181 ++++----------------- optimum/intel/openvino/modeling_base.py | 7 +- optimum/intel/openvino/modeling_decoder.py | 4 - optimum/intel/openvino/modeling_seq2seq.py | 2 - optimum/intel/openvino/quantization.py | 49 +++--- optimum/intel/openvino/trainer.py | 2 +- setup.py | 4 +- 7 files changed, 65 insertions(+), 184 deletions(-) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py index bf5cc7b933..ba12f53e26 100644 --- a/optimum/intel/openvino/export.py +++ b/optimum/intel/openvino/export.py @@ -1,9 +1,9 @@ +import logging import inspect import os from pathlib import Path from typing import Dict, List, Optional, Tuple, Union, Any, Callable import functools -import time from transformers.utils import is_tf_available, is_torch_available from transformers import AutoTokenizer @@ -16,11 +16,13 @@ from optimum.utils.save_utils import maybe_save_preprocessors from optimum.exporters.onnx import __main__ -from openvino.tools.mo import convert_model +from openvino.tools.mo import convert_model from openvino.runtime import serialize, PartialShape from openvino.runtime.utils.types import get_element_type from .utils import OV_XML_FILE_NAME +logger = logging.getLogger(__name__) + if is_torch_available(): import torch.nn as nn from transformers.modeling_utils import PreTrainedModel @@ -31,11 +33,13 @@ if is_tf_available(): from transformers.modeling_tf_utils import TFPreTrainedModel + def is_torch_model(model): if not is_torch_available(): return False return isinstance(model, nn.Module) + def export( model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], config: OnnxConfig, @@ -77,7 +81,9 @@ def export( raise ImportError("The pip package `diffusers` is required to export stable diffusion models to ONNX.") if is_torch_available() and isinstance(model, nn.Module): - return export_pytorch(model, config, opset, output, device=device, input_shapes=input_shapes, model_kwargs=model_kwargs) + return export_pytorch( + model, config, opset, output, device=device, input_shapes=input_shapes, model_kwargs=model_kwargs + ) elif is_tf_available() and issubclass(type(model), TFPreTrainedModel): output.parent.mkdir(parents=True, exist_ok=True) @@ -86,7 +92,7 @@ def export( if device == "cuda": raise RuntimeError("`tf2onnx` does not support export on CUDA device.") if input_shapes is not None: - print("`input_shapes` argument is not supported by the Tensorflow ONNX export and will be ignored.") + logger.info("`input_shapes` argument is not supported by the Tensorflow ONNX export and will be ignored.") return export_tensorflow(model, config, opset, output) else: @@ -127,10 +133,10 @@ def export_pytorch( the ONNX configuration. """ import torch - from torch.onnx import export as onnx_export from torch.utils._pytree import tree_map - print(f"Using framework PyTorch: {torch.__version__}") + logger.info(f"Using framework PyTorch: {torch.__version__}") + output = Path(output) with torch.no_grad(): model.config.return_dict = True @@ -140,9 +146,9 @@ def export_pytorch( # Check if we need to override certain configuration item if config.values_override is not None: - print(f"Overriding {len(config.values_override)} configuration item(s)") + logger.info(f"Overriding {len(config.values_override)} configuration item(s)") for override_config_key, override_config_value in config.values_override.items(): - print(f"\t- {override_config_key} -> {override_config_value}") + logger.info(f"\t- {override_config_key} -> {override_config_value}") setattr(model.config, override_config_key, override_config_value) if input_shapes is None: @@ -167,36 +173,39 @@ def export_pytorch( dummy_inputs = remove_none_from_dummy_inputs(dummy_inputs) input_info = get_input_shapes(dummy_inputs, inputs) - start0 = time.perf_counter() try: if custom_patcher: patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs) patched_forward = patcher.patched_forward + @functools.wraps(patched_forward) def ts_patched_forward(*args, **kwargs): outputs = patched_forward(*args, **kwargs) return tuple(outputs.values()) + patcher.patched_forward = ts_patched_forward with patcher: ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) else: ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) except Exception: + model.config.torchscript = False + model.config.return_dict = True onnx_output = output.with_suffix(".onnx") - input_names, output_names = export_pytorch_to_onnx(model, config, opset, onnx_output, device, input_shapes, model_kwargs) + input_names, output_names = export_pytorch_to_onnx( + model, config, opset, onnx_output, device, input_shapes, model_kwargs + ) ov_model = convert_model(onnx_output) serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) - return input_names, output_names + return input_names, output_names, True - end0 = time.perf_counter() - print(f"Convert model took {end0 - start0}s") ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} ordered_input_names = list(inputs) flatten_inputs = flattenize_inputs(ordered_dummy_inputs.values()) for idx, out_tensor in enumerate(ov_model.outputs): if idx < len(output_names): out_tensor.get_tensor().set_names({output_names[idx]}) - + for idx, inp_tensor in enumerate(ov_model.inputs): input_name = ordered_input_names[idx] inp_tensor.get_tensor().set_names({input_name}) @@ -205,15 +214,12 @@ def ts_patched_forward(*args, **kwargs): dims = inputs[input_name] for dim in dims: - static_shape[dim] = -1 + static_shape[dim] = -1 inp_tensor.get_node().set_partial_shape(static_shape) inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) ov_model.validate_nodes_and_infer_types() - start1 = time.perf_counter() serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) - end1 = time.perf_counter() - print(f"Serailize model took {end1 - start1}s") - return input_names, output_names + return input_names, output_names, False def export_models( @@ -227,30 +233,6 @@ def export_models( input_shapes: Optional[Dict] = None, model_kwargs: Optional[Dict[str, Any]] = None, ) -> Tuple[List[List[str]], List[List[str]]]: - """ - Exports a Pytorch or TensorFlow encoder decoder model to an ONNX Intermediate Representation. - The following method exports the encoder and decoder components of the model as separate - ONNX files. - - Args: - models_and_onnx_configs (`Dict[str, Tuple[Union[`PreTrainedModel`, `TFPreTrainedModel`], `OnnxConfig`]]): - A dictionnary containing the models to export and their corresponding onnx configs. - output_dir (`Path`): - Output directory to store the exported ONNX models. - opset (`Optional[int]`, defaults to `None`): - The version of the ONNX operator set to use. - output_names (`Optional[List[str]]`, defaults to `None`): - The names to use for the exported ONNX files. The order must be the same as the order of submodels in the ordered dict `models_and_onnx_configs`. - If None, will use the keys from `models_and_onnx_configs` as names. - device (`str`, defaults to `"cpu"`): - The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for - export on CUDA devices. - input_shapes (`Optional[Dict]`, defaults to `None`): - If specified, allows to use specific shapes for the example input provided to the ONNX exporter. - Returns: - `Tuple[List[List[str]], List[List[str]]]`: A tuple with an ordered list of the model's inputs, and the named - inputs from the ONNX configuration. - """ outputs = [] if output_names is not None and len(output_names) != len(models_and_onnx_configs): @@ -296,7 +278,7 @@ def remove_none_from_list_tuple(item): new_item = [i for i in item if i is not None] return type(item)(new_item) - upd_dummy = {} + upd_dummy = {} for k, v in dummy_inputs.items(): if v is None: continue @@ -310,6 +292,7 @@ def remove_none_from_list_tuple(item): upd_dummy[k] = v return upd_dummy + def get_input_shapes(dummy_inputs, inputs): input_info = [] for input_name, data in dummy_inputs.items(): @@ -330,7 +313,6 @@ def main_export( task: str = "auto", device: str = "cpu", fp16: Optional[bool] = False, - optimize: Optional[str] = None, monolith: bool = False, framework: Optional[str] = None, cache_dir: Optional[str] = None, @@ -341,118 +323,15 @@ def main_export( force_download: bool = False, local_files_only: bool = False, use_auth_token: Optional[Union[bool, str]] = None, - for_ort: bool = False, model_kwargs: Optional[Dict[str, Any]] = None, custom_onnx_configs: Optional[Dict[str, "OnnxConfig"]] = None, fn_get_submodels: Optional[Callable] = None, **kwargs_shapes, ): - """ - Full-suite ONNX export. - - Args: - > Required parameters - - model_name_or_path (`str`): - Model ID on huggingface.co or path on disk to the model repository to export. - output (`Union[str, Path]`): - Path indicating the directory where to store the generated ONNX model. - - > Optional parameters - - task (`Optional[str]`, defaults to `None`): - The task to export the model for. If not specified, the task will be auto-inferred based on the model. For decoder models, - use `xxx-with-past` to export the model using past key values in the decoder. - opset (`Optional[int]`, defaults to `None`): - If specified, ONNX opset version to export the model with. Otherwise, the default opset for the given model architecture - will be used. - device (`str`, defaults to `"cpu"`): - The device to use to do the export. Defaults to "cpu". - fp16 (`Optional[bool]`, defaults to `"False"`): - Use half precision during the export. PyTorch-only, requires `device="cuda"`. - optimize (`Optional[str]`, defaults to `None`): - Allows to run ONNX Runtime optimizations directly during the export. Some of these optimizations are specific to - ONNX Runtime, and the resulting ONNX will not be usable with other runtime as OpenVINO or TensorRT. - Available options: `"O1", "O2", "O3", "O4"`. Reference: [`~optimum.onnxruntime.AutoOptimizationConfig`] - monolith (`bool`, defaults to `False`): - Forces to export the model as a single ONNX file. - no_post_process (`bool`, defaults to `False`): - Allows to disable any post-processing done by default on the exported ONNX models. - framework (`Optional[str]`, defaults to `None`): - The framework to use for the ONNX export (`"pt"` or `"tf"`). If not provided, will attempt to automatically detect - the framework for the checkpoint. - atol (`Optional[float]`, defaults to `None`): - If specified, the absolute difference tolerance when validating the model. Otherwise, the default atol for the model will be used. - cache_dir (`Optional[str]`, defaults to `None`): - Path indicating where to store cache. The default Hugging Face cache path will be used by default. - trust_remote_code (`bool`, defaults to `False`): - Allows to use custom code for the modeling hosted in the model repository. This option should only be set for repositories - you trust and in which you have read the code, as it will execute on your local machine arbitrary code present in the - model repository. - pad_token_id (`Optional[int]`, defaults to `None`): - This is needed by some models, for some tasks. If not provided, will attempt to use the tokenizer to guess it. - subfolder (`str`, defaults to `""`): - In case the relevant files are located inside a subfolder of the model repo either locally or on huggingface.co, you can - specify the folder name here. - revision (`str`, defaults to `"main"`): - Revision is the specific model version to use. It can be a branch name, a tag name, or a commit id. - force_download (`bool`, defaults to `False`): - Whether or not to force the (re-)download of the model weights and configuration files, overriding the - cached versions if they exist. - local_files_only (`Optional[bool]`, defaults to `False`): - Whether or not to only look at local files (i.e., do not try to download the model). - use_auth_token (`Optional[str]`, defaults to `None`): - The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated - when running `transformers-cli login` (stored in `~/.huggingface`). - model_kwargs (`Optional[Dict[str, Any]]`, defaults to `None`): - Experimental usage: keyword arguments to pass to the model during - the export. This argument should be used along the `custom_onnx_configs` argument - in case, for example, the model inputs/outputs are changed (for example, if - `model_kwargs={"output_attentions": True}` is passed). - custom_onnx_configs (`Optional[Dict[str, OnnxConfig]]`, defaults to `None`): - Experimental usage: override the default ONNX config used for the given model. This argument may be useful for advanced users that desire a finer-grained control on the export. An example is available [here](https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model). - fn_get_submodels (`Optional[Callable]`, defaults to `None`): - Experimental usage: Override the default submodels that are used at the export. This is - especially useful when exporting a custom architecture that needs to split the ONNX (e.g. encoder-decoder). If unspecified with custom models, optimum will try to use the default submodels used for the given task, with no guarantee of success. - use_subprocess (`bool`): - Do the ONNX exported model validation in subprocesses. This is especially useful when - exporting on CUDA device, where ORT does not release memory at inference session - destruction. When set to `True`, the `main_export` call should be guarded in - `if __name__ == "__main__":` block. - **kwargs_shapes (`Dict`): - Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export. - - Example usage: - ```python - >>> from optimum.exporters.onnx import main_export - - >>> main_export("gpt2", output="gpt2_onnx/") - ``` - """ - if optimize == "O4" and device != "cuda": - raise ValueError( - "Requested O4 optimization, but this optimization requires to do the export on GPU." - " Please pass the argument `--device cuda`." - ) - - if (framework == "tf" and fp16 is True) or not is_torch_available(): - raise ValueError("The --fp16 option is supported only for PyTorch.") - - if fp16 is True and device == "cpu": - raise ValueError( - "The --fp16 option is supported only when exporting on GPU. Please pass the option `--device cuda`." - ) - output = Path(output) if not output.exists(): output.mkdir(parents=True) - if for_ort: - logger.warning( - "The option --for-ort was passed, but its behavior is now the default in the ONNX exporter" - " and passing it is not required anymore." - ) - original_task = task task = TasksManager.map_from_synonym(task) @@ -528,7 +407,7 @@ def main_export( if original_task == "auto": # Make -with-past the default if --task was not explicitely specified task = task + "-with-past" else: - print( + logger.info( f"The task `{task}` was manually specified, and past key values will not be reused in the decoding." f" if needed, please pass `--task {task}-with-past` to export using the past key values." ) @@ -547,7 +426,7 @@ def main_export( possible_synonyms = f" (possible synonyms are: {synonyms_for_task})" else: possible_synonyms = "" - print(f"Automatic task detection to {task}{possible_synonyms}.") + logger.info(f"Automatic task detection to {task}{possible_synonyms}.") onnx_config, models_and_onnx_configs = __main__._get_submodels_and_onnx_configs( model=model, @@ -625,4 +504,4 @@ def main_export( input_shapes=input_shapes, device=device, model_kwargs=model_kwargs, - ) \ No newline at end of file + ) diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 71e74b154e..cf8e94c0c7 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -133,9 +133,12 @@ def fix_op_names_duplicates(model: openvino.runtime.Model): file_name = Path(file_name) bin_file_name = file_name.with_suffix(".bin") if file_name.suffix == ".xml" else None s = time.perf_counter() - model = core.read_model(file_name, bin_file_name) if not file_name.suffix == ".onnx" else mo.convert_model(file_name) + model = ( + core.read_model(file_name, bin_file_name) + if not file_name.suffix == ".onnx" + else mo.convert_model(file_name) + ) e = time.perf_counter() - print(f"Read model took {e - s}s") if file_name.suffix == ".onnx": model = fix_op_names_duplicates(model) # should be called during model conversion to IR diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index bcdc077a3d..cf81437b01 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -17,7 +17,6 @@ from pathlib import Path from tempfile import TemporaryDirectory from typing import Dict, Optional, Tuple, Union -import time import numpy as np import openvino @@ -226,10 +225,7 @@ def _from_transformers( "force_download": force_download, "trust_remote_code": trust_remote_code, } - start0 = time.perf_counter() model = TasksManager.get_model_from_task(task, model_id, **model_kwargs) - end0 = time.perf_counter() - print(f"Reading PT model took {end0 - start0}") config.is_decoder = True config.is_encoder_decoder = False onnx_config_constructor = TasksManager.get_exporter_config_constructor(model=model, exporter="onnx", task=task) diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py index 9994b70d64..4d5f4e2934 100644 --- a/optimum/intel/openvino/modeling_seq2seq.py +++ b/optimum/intel/openvino/modeling_seq2seq.py @@ -412,8 +412,6 @@ def forward( # Add the encoder_hidden_states inputs when needed if "encoder_hidden_states" in self.input_names and encoder_hidden_states is not None: inputs["encoder_hidden_states"] = encoder_hidden_states - - print(self.model) # Run inference self.request.start_async(inputs, shared_memory=True) self.request.wait() diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 99e22e72f5..6c7db722e2 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -33,7 +33,8 @@ from torch.utils.data import DataLoader, RandomSampler, TensorDataset from transformers import DataCollator, PreTrainedModel, default_data_collator -from optimum.exporters.onnx import export +from optimum.exporters.onnx import export as onnx_export +from .export import export from optimum.exporters.tasks import TasksManager from optimum.quantization_base import OptimumQuantizer @@ -384,28 +385,32 @@ def data_collator(batch): else: onnx_config = onnx_config_class(model.config) - onnx_path = save_directory / ONNX_WEIGHTS_NAME - - # Export the model to the ONNX format - opset = min(onnx_config.DEFAULT_ONNX_OPSET, MAX_ONNX_OPSET) - opset = max(opset, MIN_ONNX_QDQ_OPSET) - export( - model=compressed_model, - config=onnx_config, - opset=opset, - output=onnx_path, - ) + model_path = save_directory / (ONNX_WEIGHTS_NAME if quantization_config.save_onnx_model else OV_XML_FILE_NAME) + if quantization_config.save_onnx_model: + # Export the model to the ONNX format + opset = min(onnx_config.DEFAULT_ONNX_OPSET, MAX_ONNX_OPSET) + opset = max(opset, MIN_ONNX_QDQ_OPSET) + onnx_export( + model=compressed_model, + config=onnx_config, + opset=opset, + output=model_path, + ) - # Load and save the compressed model - model = core.read_model(onnx_path) - self._save_pretrained(model, output_path) - quantization_config.save_pretrained(save_directory) - if not quantization_config.save_onnx_model: - os.remove(onnx_path) - try: - os.remove(f"{onnx_path}_data") - except FileNotFoundError: - pass + # Load and save the compressed model + model = core.read_model(model_path) + self._save_pretrained(model, output_path) + else: + _, _, is_onnx = export(model=compressed_model, config=onnx_config, output=output_path) + if is_onnx: + onnx_path = output_path.replace(".xml", ".onnx") + model = core.read_model(onnx_path) + self._save_pretrained(model, output_path) + os.remove(onnx_path) + try: + os.remove(f"{onnx_path}_data") + except FileNotFoundError: + pass @staticmethod def _save_pretrained(model: openvino.runtime.Model, output_path: str): diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py index 091d28047a..811309806a 100644 --- a/optimum/intel/openvino/trainer.py +++ b/optimum/intel/openvino/trainer.py @@ -41,7 +41,7 @@ from nncf.torch.quantization.algo import QuantizationController from openvino._offline_transformations import compress_quantize_weights_transformation from openvino.runtime import Core, PartialShape, serialize -from openvino.tools.ovc.moc_frontend.offline_transformations import ( +from openvino.tools.mo.back.offline_transformations import ( apply_fused_names_cleanup, apply_moc_transformations, apply_user_transformations, diff --git a/setup.py b/setup.py index c35640226d..6ddc9fdd6e 100644 --- a/setup.py +++ b/setup.py @@ -41,8 +41,8 @@ "onnx", "onnxruntime<1.15.0", ], - "openvino": ["openvino>=2023.0.0", "onnx", "onnxruntime"], - "nncf": ["nncf>=2.5.0", "openvino-dev>=2023.0.0"], + "openvino": ["openvino==2023.1.0.dev20230728", "onnx", "onnxruntime"], + "nncf": ["nncf>=2.5.0", "openvino-dev==2023.1.0.dev20230728"], "ipex": ["transformers<4.32.0", "intel-extension-for-pytorch", "onnx"], "diffusers": ["diffusers", "invisible-watermark>=0.2.0"], "quality": QUALITY_REQUIRE, From 58acfb59434887b8049f39d24667d49856db288f Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 2 Aug 2023 14:23:41 +0400 Subject: [PATCH 05/38] fix style --- optimum/intel/openvino/export.py | 23 +++++++++++-------- optimum/intel/openvino/modeling_base.py | 7 ++---- .../intel/openvino/modeling_base_seq2seq.py | 2 +- optimum/intel/openvino/modeling_decoder.py | 2 +- optimum/intel/openvino/modeling_diffusion.py | 3 +-- optimum/intel/openvino/quantization.py | 2 +- 6 files changed, 19 insertions(+), 20 deletions(-) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py index ba12f53e26..143f725fc7 100644 --- a/optimum/intel/openvino/export.py +++ b/optimum/intel/openvino/export.py @@ -1,29 +1,32 @@ -import logging +import functools import inspect +import logging import os from pathlib import Path -from typing import Dict, List, Optional, Tuple, Union, Any, Callable -import functools +from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from transformers.utils import is_tf_available, is_torch_available +from openvino.runtime import PartialShape, serialize +from openvino.runtime.utils.types import get_element_type +from openvino.tools.mo import convert_model +from requests.exceptions import ConnectionError as RequestsConnectionError from transformers import AutoTokenizer +from transformers.utils import is_tf_available, is_torch_available -from optimum.utils import is_diffusers_available, DEFAULT_DUMMY_SHAPES from optimum.exporters import TasksManager +from optimum.exporters.onnx import __main__ from optimum.exporters.onnx.base import OnnxConfig, OnnxConfigWithPast -from optimum.exporters.onnx.convert import export_tensorflow, check_dummy_inputs_are_allowed +from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed, export_tensorflow from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx +from optimum.utils import DEFAULT_DUMMY_SHAPES, is_diffusers_available from optimum.utils.save_utils import maybe_save_preprocessors -from optimum.exporters.onnx import __main__ -from openvino.tools.mo import convert_model -from openvino.runtime import serialize, PartialShape -from openvino.runtime.utils.types import get_element_type from .utils import OV_XML_FILE_NAME + logger = logging.getLogger(__name__) if is_torch_available(): + import torch import torch.nn as nn from transformers.modeling_utils import PreTrainedModel diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index cf8e94c0c7..1388497b55 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -17,22 +17,21 @@ from pathlib import Path from tempfile import TemporaryDirectory from typing import Dict, Optional, Union -import time import openvino from huggingface_hub import hf_hub_download from openvino._offline_transformations import apply_moc_transformations, compress_model_transformation -from openvino.tools import mo from openvino.runtime import Core +from openvino.tools import mo from transformers import PretrainedConfig from transformers.file_utils import add_start_docstrings from optimum.exporters.onnx import OnnxConfig from optimum.exporters.tasks import TasksManager -from .export import export, is_torch_model from optimum.modeling_base import OptimizedModel from ..utils.import_utils import is_transformers_version +from .export import export, is_torch_model from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME @@ -132,13 +131,11 @@ def fix_op_names_duplicates(model: openvino.runtime.Model): if isinstance(file_name, str): file_name = Path(file_name) bin_file_name = file_name.with_suffix(".bin") if file_name.suffix == ".xml" else None - s = time.perf_counter() model = ( core.read_model(file_name, bin_file_name) if not file_name.suffix == ".onnx" else mo.convert_model(file_name) ) - e = time.perf_counter() if file_name.suffix == ".onnx": model = fix_op_names_duplicates(model) # should be called during model conversion to IR diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py index af5e2388f8..5a5e195845 100644 --- a/optimum/intel/openvino/modeling_base_seq2seq.py +++ b/optimum/intel/openvino/modeling_base_seq2seq.py @@ -26,9 +26,9 @@ from optimum.exporters import TasksManager from optimum.exporters.onnx import get_encoder_decoder_models_for_export -from .export import export_models from ..utils.import_utils import is_transformers_version +from .export import export_models from .modeling_base import OVBaseModel from .utils import ( ONNX_DECODER_NAME, diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index cf81437b01..7cc2b34f2c 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -32,9 +32,9 @@ from ..utils.import_utils import is_transformers_version from ..utils.modeling_utils import _prepare_attn_mask, _prepare_decoder_attention_mask +from .export import export, is_torch_model from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, STR_TO_OV_TYPE -from .export import export, is_torch_model if is_transformers_version("<", "4.25.0"): diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index c0d0870db3..c807f61b3f 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -36,7 +36,6 @@ from openvino.runtime import Core from transformers import CLIPFeatureExtractor, CLIPTokenizer -from .export import main_export from optimum.pipelines.diffusers.pipeline_stable_diffusion import StableDiffusionPipelineMixin from optimum.pipelines.diffusers.pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipelineMixin from optimum.pipelines.diffusers.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipelineMixin @@ -51,7 +50,7 @@ ) from .loaders import OVTextualInversionLoaderMixin -from .export import export_models +from .export import main_export from .modeling_base import OVBaseModel from .utils import ONNX_WEIGHTS_NAME, OV_TO_NP_TYPE, OV_XML_FILE_NAME diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 6c7db722e2..94b809bd39 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -34,12 +34,12 @@ from transformers import DataCollator, PreTrainedModel, default_data_collator from optimum.exporters.onnx import export as onnx_export -from .export import export from optimum.exporters.tasks import TasksManager from optimum.quantization_base import OptimumQuantizer from ..utils.constant import _TASK_ALIASES from .configuration import INT8_WEIGHT_COMPRESSION_CONFIG, OVConfig +from .export import export from .modeling_base import OVBaseModel from .modeling_decoder import OVBaseDecoderModel from .utils import ( From bf94ecc60eec265d1d33df4885b24cf18323d516 Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Wed, 2 Aug 2023 14:14:43 +0400 Subject: [PATCH 06/38] revert changes not related to pr --- optimum/intel/openvino/modeling_decoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 7cc2b34f2c..b5cbc2ba48 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -234,7 +234,7 @@ def _from_transformers( # TODO : create ModelPatcher to patch each architecture if config.model_type == "bloom": model.transformer._prepare_attn_mask = _prepare_attn_mask - elif config.model_type in {"llama", "longllama"}: + elif config.model_type == "llama": model.model._prepare_decoder_attention_mask = _prepare_decoder_attention_mask elif config.model_type in {"blenderbot-small", "blenderbot", "opt", "pegasus", "bart"}: model.model.decoder._prepare_decoder_attention_mask = _prepare_decoder_attention_mask From 744c2b643c0e882a617daec3bf6836b6c9976a71 Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 2 Aug 2023 16:19:35 +0400 Subject: [PATCH 07/38] clear ts registry: --- optimum/intel/openvino/export.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py index 143f725fc7..bb637a5353 100644 --- a/optimum/intel/openvino/export.py +++ b/optimum/intel/openvino/export.py @@ -1,4 +1,5 @@ import functools +import gc import inspect import logging import os @@ -201,7 +202,7 @@ def ts_patched_forward(*args, **kwargs): ov_model = convert_model(onnx_output) serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) return input_names, output_names, True - + clear_class_registry() ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} ordered_input_names = list(inputs) flatten_inputs = flattenize_inputs(ordered_dummy_inputs.values()) @@ -222,9 +223,17 @@ def ts_patched_forward(*args, **kwargs): inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) ov_model.validate_nodes_and_infer_types() serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) + del model + gc.collect() return input_names, output_names, False +def clear_class_registry(): + torch._C._jit_clear_class_registry() + torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() + torch.jit._state._clear_class_state() + + def export_models( models_and_onnx_configs: Dict[ str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], "OnnxConfig"] From 1ca1edb6b6a9f1253d986911be5f41253ea7c720 Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 2 Aug 2023 18:00:44 +0400 Subject: [PATCH 08/38] remove ov dev from deps --- optimum/intel/openvino/export.py | 7 +++---- optimum/intel/openvino/modeling_base.py | 7 ++----- optimum/intel/openvino/quantization.py | 1 + optimum/intel/openvino/trainer.py | 12 ++++++------ setup.py | 2 +- 5 files changed, 13 insertions(+), 16 deletions(-) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py index bb637a5353..5852195d63 100644 --- a/optimum/intel/openvino/export.py +++ b/optimum/intel/openvino/export.py @@ -6,9 +6,8 @@ from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from openvino.runtime import PartialShape, serialize +from openvino import PartialShape, convert_model, save_model from openvino.runtime.utils.types import get_element_type -from openvino.tools.mo import convert_model from requests.exceptions import ConnectionError as RequestsConnectionError from transformers import AutoTokenizer from transformers.utils import is_tf_available, is_torch_available @@ -200,7 +199,7 @@ def ts_patched_forward(*args, **kwargs): model, config, opset, onnx_output, device, input_shapes, model_kwargs ) ov_model = convert_model(onnx_output) - serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) + save_model(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) return input_names, output_names, True clear_class_registry() ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} @@ -222,7 +221,7 @@ def ts_patched_forward(*args, **kwargs): inp_tensor.get_node().set_partial_shape(static_shape) inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) ov_model.validate_nodes_and_infer_types() - serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) + save_model(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) del model gc.collect() return input_names, output_names, False diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 1388497b55..4c99bef09f 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -20,9 +20,8 @@ import openvino from huggingface_hub import hf_hub_download +from openvino import Core, convert_model from openvino._offline_transformations import apply_moc_transformations, compress_model_transformation -from openvino.runtime import Core -from openvino.tools import mo from transformers import PretrainedConfig from transformers.file_utils import add_start_docstrings @@ -132,9 +131,7 @@ def fix_op_names_duplicates(model: openvino.runtime.Model): file_name = Path(file_name) bin_file_name = file_name.with_suffix(".bin") if file_name.suffix == ".xml" else None model = ( - core.read_model(file_name, bin_file_name) - if not file_name.suffix == ".onnx" - else mo.convert_model(file_name) + core.read_model(file_name, bin_file_name) if not file_name.suffix == ".onnx" else convert_model(file_name) ) if file_name.suffix == ".onnx": model = fix_op_names_duplicates(model) # should be called during model conversion to IR diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 94b809bd39..311ad211f9 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -411,6 +411,7 @@ def data_collator(batch): os.remove(f"{onnx_path}_data") except FileNotFoundError: pass + quantization_config.save_pretrained(save_directory) @staticmethod def _save_pretrained(model: openvino.runtime.Model, output_path: str): diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py index 811309806a..22d402c80f 100644 --- a/optimum/intel/openvino/trainer.py +++ b/optimum/intel/openvino/trainer.py @@ -39,13 +39,13 @@ from nncf.torch.compression_method_api import PTCompressionAlgorithmController from nncf.torch.nncf_network import NNCFNetwork from nncf.torch.quantization.algo import QuantizationController -from openvino._offline_transformations import compress_quantize_weights_transformation -from openvino.runtime import Core, PartialShape, serialize -from openvino.tools.mo.back.offline_transformations import ( +from openvino._offline_transformations import ( apply_fused_names_cleanup, apply_moc_transformations, - apply_user_transformations, + apply_pruning_transformation, + compress_quantize_weights_transformation, ) +from openvino.runtime import Core, PartialShape, save_model from torch.onnx import export as onnx_export from torch.utils._pytree import tree_map from torch.utils.data import DataLoader, Dataset, RandomSampler @@ -755,7 +755,7 @@ def _save(self, output_dir: Optional[str] = None, state_dict=None): apply_moc_transformations(ov_model) if self._get_compression_controller_by_cls(QuantizationController) is not None: compress_quantize_weights_transformation(ov_model) - apply_user_transformations(ov_model, [("Pruning", {})]) + apply_pruning_transformation(ov_model) apply_fused_names_cleanup(ov_model) # Reshape back to dynamic shape IR ov_model = self._reshape_ir(ov_model, static_shape=False) @@ -772,7 +772,7 @@ def _save(self, output_dir: Optional[str] = None, state_dict=None): compress_quantize_weights_transformation(ov_model) # Serialize IR xml and bin - serialize(ov_model, output_path) + save_model(ov_model, output_path) def _get_compression_controller_by_cls( self, controller_cls: Type[PTCompressionAlgorithmController] diff --git a/setup.py b/setup.py index 6ddc9fdd6e..7bdb9c062e 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ "onnxruntime<1.15.0", ], "openvino": ["openvino==2023.1.0.dev20230728", "onnx", "onnxruntime"], - "nncf": ["nncf>=2.5.0", "openvino-dev==2023.1.0.dev20230728"], + "nncf": ["nncf>=2.5.0"], "ipex": ["transformers<4.32.0", "intel-extension-for-pytorch", "onnx"], "diffusers": ["diffusers", "invisible-watermark>=0.2.0"], "quality": QUALITY_REQUIRE, From 3430ab024f433d541dbc1fce8a5286c721543910 Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 2 Aug 2023 18:14:35 +0400 Subject: [PATCH 09/38] update tests --- tests/openvino/test_modeling.py | 4 ++++ tests/openvino/test_quantization.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index b56b7766e7..2ffbbd6fba 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -257,6 +257,7 @@ def test_pipeline(self, model_arch): self.assertTrue(not model.is_dynamic) self.assertGreaterEqual(outputs[0]["score"], 0.0) self.assertIsInstance(outputs[0]["label"], str) + del model gc.collect() @@ -293,6 +294,8 @@ def test_compare_to_transformers(self, model_arch): self.assertTrue( torch.allclose(torch.Tensor(ov_outputs.end_logits), transformers_outputs.end_logits, atol=1e-4) ) + del ov_model + del transformers_model gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) @@ -307,6 +310,7 @@ def test_pipeline(self, model_arch): self.assertEqual(pipe.device, model.device) self.assertGreaterEqual(outputs["score"], 0.0) self.assertIsInstance(outputs["answer"], str) + del model gc.collect() def test_metric(self): diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index da9ba3b25a..4a2f96447c 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -64,7 +64,7 @@ def get_num_quantized_nodes(ov_model): class OVQuantizerTest(unittest.TestCase): # TODO : add models SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = ( - (OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 42, 32), + (OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 32, 32), (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 41, 21), ) @@ -146,7 +146,7 @@ def preprocess_function(examples, tokenizer): class OVWeightCompressionTest(unittest.TestCase): # TODO : add models SUPPORTED_ARCHITECTURES_WITH_EXPECTED_COMPRESSED_MATMULS = ( - (OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 39), + (OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 35), (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 5), ) From 661980b6db4f5a45d8618cdc257ae44786dbc40c Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 2 Aug 2023 18:46:46 +0400 Subject: [PATCH 10/38] return serialize back --- optimum/intel/openvino/export.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py index 5852195d63..375df18ac4 100644 --- a/optimum/intel/openvino/export.py +++ b/optimum/intel/openvino/export.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from openvino import PartialShape, convert_model, save_model +from openvino import PartialShape, convert_model, serialize from openvino.runtime.utils.types import get_element_type from requests.exceptions import ConnectionError as RequestsConnectionError from transformers import AutoTokenizer @@ -199,7 +199,7 @@ def ts_patched_forward(*args, **kwargs): model, config, opset, onnx_output, device, input_shapes, model_kwargs ) ov_model = convert_model(onnx_output) - save_model(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) + serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) return input_names, output_names, True clear_class_registry() ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} @@ -221,7 +221,7 @@ def ts_patched_forward(*args, **kwargs): inp_tensor.get_node().set_partial_shape(static_shape) inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) ov_model.validate_nodes_and_infer_types() - save_model(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) + serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) del model gc.collect() return input_names, output_names, False From ddd98e8e8bca8b9e5dde984d8622e6ca95fe8a04 Mon Sep 17 00:00:00 2001 From: eaidova Date: Thu, 13 Jul 2023 10:27:10 +0400 Subject: [PATCH 11/38] switch on pytorch frontend --- optimum/intel/openvino/export.py | 124 +++++++++++++++++++++ optimum/intel/openvino/modeling_base.py | 2 + optimum/intel/openvino/modeling_decoder.py | 3 +- 3 files changed, 128 insertions(+), 1 deletion(-) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py index 375df18ac4..d736016cf9 100644 --- a/optimum/intel/openvino/export.py +++ b/optimum/intel/openvino/export.py @@ -29,6 +29,10 @@ import torch import torch.nn as nn from transformers.modeling_utils import PreTrainedModel +<<<<<<< HEAD +======= + from transformers.pytorch_utils import is_torch_less_than_1_11 +>>>>>>> switch on pytorch frontend if is_diffusers_available(): from diffusers import ModelMixin @@ -36,13 +40,19 @@ if is_tf_available(): from transformers.modeling_tf_utils import TFPreTrainedModel +<<<<<<< HEAD +======= +>>>>>>> switch on pytorch frontend def is_torch_model(model): if not is_torch_available(): return False return isinstance(model, nn.Module) +<<<<<<< HEAD +======= +>>>>>>> switch on pytorch frontend def export( model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], config: OnnxConfig, @@ -50,7 +60,10 @@ def export( opset: Optional[int] = None, device: str = "cpu", input_shapes: Optional[Dict] = None, +<<<<<<< HEAD model_kwargs: Optional[Dict[str, Any]] = None, +======= +>>>>>>> switch on pytorch frontend ) -> Tuple[List[str], List[str]]: """ Exports a Pytorch or TensorFlow model to an ONNX Intermediate Representation. @@ -84,9 +97,13 @@ def export( raise ImportError("The pip package `diffusers` is required to export stable diffusion models to ONNX.") if is_torch_available() and isinstance(model, nn.Module): +<<<<<<< HEAD return export_pytorch( model, config, opset, output, device=device, input_shapes=input_shapes, model_kwargs=model_kwargs ) +======= + return export_pytorch(model, config, opset, output, device=device, input_shapes=input_shapes) +>>>>>>> switch on pytorch frontend elif is_tf_available() and issubclass(type(model), TFPreTrainedModel): output.parent.mkdir(parents=True, exist_ok=True) @@ -95,7 +112,11 @@ def export( if device == "cuda": raise RuntimeError("`tf2onnx` does not support export on CUDA device.") if input_shapes is not None: +<<<<<<< HEAD logger.info("`input_shapes` argument is not supported by the Tensorflow ONNX export and will be ignored.") +======= + print("`input_shapes` argument is not supported by the Tensorflow ONNX export and will be ignored.") +>>>>>>> switch on pytorch frontend return export_tensorflow(model, config, opset, output) else: @@ -111,7 +132,10 @@ def export_pytorch( output: Path, device: str = "cpu", input_shapes: Optional[Dict] = None, +<<<<<<< HEAD model_kwargs: Optional[Dict[str, Any]] = None, +======= +>>>>>>> switch on pytorch frontend ) -> Tuple[List[str], List[str]]: """ Exports a PyTorch model to an ONNX Intermediate Representation. @@ -136,6 +160,7 @@ def export_pytorch( the ONNX configuration. """ import torch +<<<<<<< HEAD from torch.utils._pytree import tree_map logger.info(f"Using framework PyTorch: {torch.__version__}") @@ -145,13 +170,29 @@ def export_pytorch( model.config.return_dict = True custom_patcher = type(config).patch_model_for_export != OnnxConfig.patch_model_for_export model.config.torchscript = not custom_patcher +======= + from torch.onnx import export as onnx_export + from torch.utils._pytree import tree_map + + print(f"Using framework PyTorch: {torch.__version__}") + + with torch.no_grad(): + model.config.return_dict = True + model.config.torchscript = True +>>>>>>> switch on pytorch frontend model.eval() # Check if we need to override certain configuration item if config.values_override is not None: +<<<<<<< HEAD logger.info(f"Overriding {len(config.values_override)} configuration item(s)") for override_config_key, override_config_value in config.values_override.items(): logger.info(f"\t- {override_config_key} -> {override_config_value}") +======= + print(f"Overriding {len(config.values_override)} configuration item(s)") + for override_config_key, override_config_value in config.values_override.items(): + print(f"\t- {override_config_key} -> {override_config_value}") +>>>>>>> switch on pytorch frontend setattr(model.config, override_config_key, override_config_value) if input_shapes is None: @@ -169,11 +210,19 @@ def export_pytorch( inputs = config.ordered_inputs(model) input_names = list(inputs.keys()) output_names = list(config.outputs.keys()) +<<<<<<< HEAD +======= + + if hasattr(config, "patch_ops"): + config.patch_ops() + +>>>>>>> switch on pytorch frontend if hasattr(model, "forward"): sig = inspect.signature(model.forward) else: sig = inspect.signature(model.call) +<<<<<<< HEAD dummy_inputs = remove_none_from_dummy_inputs(dummy_inputs) input_info = get_input_shapes(dummy_inputs, inputs) try: @@ -209,6 +258,17 @@ def ts_patched_forward(*args, **kwargs): if idx < len(output_names): out_tensor.get_tensor().set_names({output_names[idx]}) +======= + input_info = get_input_shapes(dummy_inputs, inputs) + start0 = time.perf_counter() + ov_model = mo.convert_model(model, example_input=dummy_inputs, input=input_info) + end0 = time.perf_counter() + print(f"Convert model took {end0 - start0}s") + ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} + ordered_input_names = list(inputs) + flatten_inputs = flattenize_inputs(ordered_dummy_inputs.values()) + +>>>>>>> switch on pytorch frontend for idx, inp_tensor in enumerate(ov_model.inputs): input_name = ordered_input_names[idx] inp_tensor.get_tensor().set_names({input_name}) @@ -217,6 +277,7 @@ def ts_patched_forward(*args, **kwargs): dims = inputs[input_name] for dim in dims: +<<<<<<< HEAD static_shape[dim] = -1 inp_tensor.get_node().set_partial_shape(static_shape) inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) @@ -231,6 +292,24 @@ def clear_class_registry(): torch._C._jit_clear_class_registry() torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() torch.jit._state._clear_class_state() +======= + static_shape[dim] = -1 + inp_tensor.get_node().set_partial_shape(static_shape) + inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) + + for idx, out_tensor in enumerate(ov_model.outputs): + if idx < len(output_names): + out_tensor.get_tensor().set_names({output_names[idx]}) + ov_model.validate_nodes_and_infer_types() + start1 = time.perf_counter() + serialize(ov_model, output.parent / OV_XML_FILE_NAME) + end1 = time.perf_counter() + print(f"Serailize model took {end1 - start1}s") + if hasattr(config, "restore_ops"): + config.restore_ops() + + return input_names, output_names +>>>>>>> switch on pytorch frontend def export_models( @@ -242,8 +321,36 @@ def export_models( output_names: Optional[List[str]] = None, device: str = "cpu", input_shapes: Optional[Dict] = None, +<<<<<<< HEAD model_kwargs: Optional[Dict[str, Any]] = None, ) -> Tuple[List[List[str]], List[List[str]]]: +======= +) -> Tuple[List[List[str]], List[List[str]]]: + """ + Exports a Pytorch or TensorFlow encoder decoder model to an ONNX Intermediate Representation. + The following method exports the encoder and decoder components of the model as separate + ONNX files. + + Args: + models_and_onnx_configs (`Dict[str, Tuple[Union[`PreTrainedModel`, `TFPreTrainedModel`], `OnnxConfig`]]): + A dictionnary containing the models to export and their corresponding onnx configs. + output_dir (`Path`): + Output directory to store the exported ONNX models. + opset (`Optional[int]`, defaults to `None`): + The version of the ONNX operator set to use. + output_names (`Optional[List[str]]`, defaults to `None`): + The names to use for the exported ONNX files. The order must be the same as the order of submodels in the ordered dict `models_and_onnx_configs`. + If None, will use the keys from `models_and_onnx_configs` as names. + device (`str`, defaults to `"cpu"`): + The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for + export on CUDA devices. + input_shapes (`Optional[Dict]`, defaults to `None`): + If specified, allows to use specific shapes for the example input provided to the ONNX exporter. + Returns: + `Tuple[List[List[str]], List[List[str]]]`: A tuple with an ordered list of the model's inputs, and the named + inputs from the ONNX configuration. + """ +>>>>>>> switch on pytorch frontend outputs = [] if output_names is not None and len(output_names) != len(models_and_onnx_configs): @@ -264,7 +371,10 @@ def export_models( opset=opset, device=device, input_shapes=input_shapes, +<<<<<<< HEAD model_kwargs=model_kwargs, +======= +>>>>>>> switch on pytorch frontend ) ) @@ -275,8 +385,11 @@ def export_models( def flattenize_inputs(inputs): flatten_inputs = [] for input_data in inputs: +<<<<<<< HEAD if input_data is None: continue +======= +>>>>>>> switch on pytorch frontend if isinstance(input_data, (list, tuple)): flatten_inputs.extend(flattenize_inputs(input_data)) else: @@ -284,6 +397,7 @@ def flattenize_inputs(inputs): return flatten_inputs +<<<<<<< HEAD def remove_none_from_dummy_inputs(dummy_inputs): def remove_none_from_list_tuple(item): new_item = [i for i in item if i is not None] @@ -308,6 +422,12 @@ def get_input_shapes(dummy_inputs, inputs): input_info = [] for input_name, data in dummy_inputs.items(): if isinstance(data, (tuple, list, dict)): +======= +def get_input_shapes(dummy_inputs, inputs): + input_info = [] + for input_name, data in dummy_inputs.items(): + if isinstance(data, (tuple, list)): +>>>>>>> switch on pytorch frontend return None static_shape = PartialShape(data.shape) if input_name in inputs: @@ -315,6 +435,7 @@ def get_input_shapes(dummy_inputs, inputs): for dim in dynamic_dims: static_shape[dim] = -1 input_info.append((input_name, static_shape)) +<<<<<<< HEAD return input_info @@ -516,3 +637,6 @@ def main_export( device=device, model_kwargs=model_kwargs, ) +======= + return input_info +>>>>>>> switch on pytorch frontend diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 4c99bef09f..b5d3ae438b 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -22,6 +22,8 @@ from huggingface_hub import hf_hub_download from openvino import Core, convert_model from openvino._offline_transformations import apply_moc_transformations, compress_model_transformation +from openvino.tools import mo +from openvino.runtime import Core from transformers import PretrainedConfig from transformers.file_utils import add_start_docstrings diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index b5cbc2ba48..c8a7210642 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -28,6 +28,7 @@ from transformers.modeling_outputs import CausalLMOutputWithPast from optimum.exporters import TasksManager +from optimum.exporters.onnx import export from optimum.utils import NormalizedConfigManager from ..utils.import_utils import is_transformers_version @@ -234,7 +235,7 @@ def _from_transformers( # TODO : create ModelPatcher to patch each architecture if config.model_type == "bloom": model.transformer._prepare_attn_mask = _prepare_attn_mask - elif config.model_type == "llama": + elif config.model_type in {"llama", "longllama"}: model.model._prepare_decoder_attention_mask = _prepare_decoder_attention_mask elif config.model_type in {"blenderbot-small", "blenderbot", "opt", "pegasus", "bart"}: model.model.decoder._prepare_decoder_attention_mask = _prepare_decoder_attention_mask From e10b087ad4fb916d1a993763b4d6a7acbca47139 Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 19 Jul 2023 11:41:07 +0400 Subject: [PATCH 12/38] fixes for seq2seq --- optimum/intel/openvino/export.py | 71 ++++++++++++++++--- optimum/intel/openvino/modeling_base.py | 1 + .../intel/openvino/modeling_base_seq2seq.py | 1 + optimum/intel/openvino/modeling_decoder.py | 5 ++ 4 files changed, 68 insertions(+), 10 deletions(-) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py index d736016cf9..aa370e5f1f 100644 --- a/optimum/intel/openvino/export.py +++ b/optimum/intel/openvino/export.py @@ -26,13 +26,15 @@ logger = logging.getLogger(__name__) if is_torch_available(): - import torch import torch.nn as nn from transformers.modeling_utils import PreTrainedModel <<<<<<< HEAD +<<<<<<< HEAD ======= from transformers.pytorch_utils import is_torch_less_than_1_11 >>>>>>> switch on pytorch frontend +======= +>>>>>>> fixes for seq2seq if is_diffusers_available(): from diffusers import ModelMixin @@ -132,10 +134,14 @@ def export_pytorch( output: Path, device: str = "cpu", input_shapes: Optional[Dict] = None, +<<<<<<< HEAD <<<<<<< HEAD model_kwargs: Optional[Dict[str, Any]] = None, ======= >>>>>>> switch on pytorch frontend +======= + model_kwargs: Optional[Dict[str, Any]] = None, +>>>>>>> fixes for seq2seq ) -> Tuple[List[str], List[str]]: """ Exports a PyTorch model to an ONNX Intermediate Representation. @@ -178,8 +184,13 @@ def export_pytorch( with torch.no_grad(): model.config.return_dict = True +<<<<<<< HEAD model.config.torchscript = True >>>>>>> switch on pytorch frontend +======= + custom_patcher = type(config).patch_model_for_export != OnnxConfig.patch_model_for_export + model.config.torchscript = not custom_patcher +>>>>>>> fixes for seq2seq model.eval() # Check if we need to override certain configuration item @@ -211,17 +222,21 @@ def export_pytorch( input_names = list(inputs.keys()) output_names = list(config.outputs.keys()) <<<<<<< HEAD +<<<<<<< HEAD ======= if hasattr(config, "patch_ops"): config.patch_ops() >>>>>>> switch on pytorch frontend +======= +>>>>>>> fixes for seq2seq if hasattr(model, "forward"): sig = inspect.signature(model.forward) else: sig = inspect.signature(model.call) +<<<<<<< HEAD <<<<<<< HEAD dummy_inputs = remove_none_from_dummy_inputs(dummy_inputs) input_info = get_input_shapes(dummy_inputs, inputs) @@ -259,14 +274,39 @@ def ts_patched_forward(*args, **kwargs): out_tensor.get_tensor().set_names({output_names[idx]}) ======= +======= + dummy_inputs = remove_none_from_dummy_inputs(dummy_inputs) +>>>>>>> fixes for seq2seq input_info = get_input_shapes(dummy_inputs, inputs) start0 = time.perf_counter() - ov_model = mo.convert_model(model, example_input=dummy_inputs, input=input_info) + try: + if custom_patcher: + patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs) + patched_forward = patcher.patched_forward + @functools.wraps(patched_forward) + def ts_patched_forward(*args, **kwargs): + outputs = patched_forward(*args, **kwargs) + return tuple(outputs.values()) + patcher.patched_forward = ts_patched_forward + with patcher: + ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) + else: + ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) + except Exception: + onnx_output = output.with_suffix(".onnx") + input_names, output_names = export_pytorch_to_onnx(model, config, opset, onnx_output, device, input_shapes, model_kwargs) + ov_model = convert_model(onnx_output) + serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) + return input_names, output_names + end0 = time.perf_counter() print(f"Convert model took {end0 - start0}s") ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} ordered_input_names = list(inputs) flatten_inputs = flattenize_inputs(ordered_dummy_inputs.values()) + for idx, out_tensor in enumerate(ov_model.outputs): + if idx < len(output_names): + out_tensor.get_tensor().set_names({output_names[idx]}) >>>>>>> switch on pytorch frontend for idx, inp_tensor in enumerate(ov_model.inputs): @@ -296,18 +336,11 @@ def clear_class_registry(): static_shape[dim] = -1 inp_tensor.get_node().set_partial_shape(static_shape) inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) - - for idx, out_tensor in enumerate(ov_model.outputs): - if idx < len(output_names): - out_tensor.get_tensor().set_names({output_names[idx]}) ov_model.validate_nodes_and_infer_types() start1 = time.perf_counter() - serialize(ov_model, output.parent / OV_XML_FILE_NAME) + serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) end1 = time.perf_counter() print(f"Serailize model took {end1 - start1}s") - if hasattr(config, "restore_ops"): - config.restore_ops() - return input_names, output_names >>>>>>> switch on pytorch frontend @@ -385,11 +418,16 @@ def export_models( def flattenize_inputs(inputs): flatten_inputs = [] for input_data in inputs: +<<<<<<< HEAD <<<<<<< HEAD if input_data is None: continue ======= >>>>>>> switch on pytorch frontend +======= + if input_data is None: + continue +>>>>>>> fixes for seq2seq if isinstance(input_data, (list, tuple)): flatten_inputs.extend(flattenize_inputs(input_data)) else: @@ -398,11 +436,15 @@ def flattenize_inputs(inputs): <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> fixes for seq2seq def remove_none_from_dummy_inputs(dummy_inputs): def remove_none_from_list_tuple(item): new_item = [i for i in item if i is not None] return type(item)(new_item) +<<<<<<< HEAD upd_dummy = {} for k, v in dummy_inputs.items(): if v is None: @@ -411,18 +453,27 @@ def remove_none_from_list_tuple(item): for kk, vv in v.items(): upd_dummy[kk] = vv continue +======= + upd_dummy = {} + for k, v in dummy_inputs.items(): + if v is None: + continue +>>>>>>> fixes for seq2seq if isinstance(v, (tuple, list)): upd_dummy[k] = remove_none_from_list_tuple(v) continue upd_dummy[k] = v return upd_dummy +<<<<<<< HEAD def get_input_shapes(dummy_inputs, inputs): input_info = [] for input_name, data in dummy_inputs.items(): if isinstance(data, (tuple, list, dict)): ======= +======= +>>>>>>> fixes for seq2seq def get_input_shapes(dummy_inputs, inputs): input_info = [] for input_name, data in dummy_inputs.items(): diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index b5d3ae438b..ec480a18a3 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -17,6 +17,7 @@ from pathlib import Path from tempfile import TemporaryDirectory from typing import Dict, Optional, Union +import time import openvino from huggingface_hub import hf_hub_download diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py index 5a5e195845..f80cd58030 100644 --- a/optimum/intel/openvino/modeling_base_seq2seq.py +++ b/optimum/intel/openvino/modeling_base_seq2seq.py @@ -26,6 +26,7 @@ from optimum.exporters import TasksManager from optimum.exporters.onnx import get_encoder_decoder_models_for_export +from .export import export_models from ..utils.import_utils import is_transformers_version from .export import export_models diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index c8a7210642..d7e678f16c 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -17,6 +17,7 @@ from pathlib import Path from tempfile import TemporaryDirectory from typing import Dict, Optional, Tuple, Union +import time import numpy as np import openvino @@ -30,6 +31,7 @@ from optimum.exporters import TasksManager from optimum.exporters.onnx import export from optimum.utils import NormalizedConfigManager +#from optimum.exporters.onnx import export from ..utils.import_utils import is_transformers_version from ..utils.modeling_utils import _prepare_attn_mask, _prepare_decoder_attention_mask @@ -226,7 +228,10 @@ def _from_transformers( "force_download": force_download, "trust_remote_code": trust_remote_code, } + start0 = time.perf_counter() model = TasksManager.get_model_from_task(task, model_id, **model_kwargs) + end0 = time.perf_counter() + print(f"Reading PT model took {end0 - start0}") config.is_decoder = True config.is_encoder_decoder = False onnx_config_constructor = TasksManager.get_exporter_config_constructor(model=model, exporter="onnx", task=task) From c1b73e10d0341065e9f98e5d12c8cb7dab54a48f Mon Sep 17 00:00:00 2001 From: eaidova Date: Fri, 28 Jul 2023 08:33:38 +0400 Subject: [PATCH 13/38] wip --- optimum/intel/openvino/export.py | 170 ++++++++++++++++++++- optimum/intel/openvino/modeling_decoder.py | 3 +- 2 files changed, 166 insertions(+), 7 deletions(-) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py index aa370e5f1f..063fb8b467 100644 --- a/optimum/intel/openvino/export.py +++ b/optimum/intel/openvino/export.py @@ -1,24 +1,26 @@ import functools import gc import inspect -import logging import os from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Tuple, Union, Any, Callable +import functools +import time from openvino import PartialShape, convert_model, serialize from openvino.runtime.utils.types import get_element_type from requests.exceptions import ConnectionError as RequestsConnectionError from transformers import AutoTokenizer from transformers.utils import is_tf_available, is_torch_available +from transformers import AutoTokenizer +from optimum.utils import is_diffusers_available, DEFAULT_DUMMY_SHAPES from optimum.exporters import TasksManager -from optimum.exporters.onnx import __main__ from optimum.exporters.onnx.base import OnnxConfig, OnnxConfigWithPast -from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed, export_tensorflow +from optimum.exporters.onnx.convert import export_tensorflow, check_dummy_inputs_are_allowed from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx -from optimum.utils import DEFAULT_DUMMY_SHAPES, is_diffusers_available from optimum.utils.save_utils import maybe_save_preprocessors +from optimum.exporters.onnx import __main__ from .utils import OV_XML_FILE_NAME @@ -62,10 +64,14 @@ def export( opset: Optional[int] = None, device: str = "cpu", input_shapes: Optional[Dict] = None, +<<<<<<< HEAD <<<<<<< HEAD model_kwargs: Optional[Dict[str, Any]] = None, ======= >>>>>>> switch on pytorch frontend +======= + model_kwargs: Optional[Dict[str, Any]] = None, +>>>>>>> wip ) -> Tuple[List[str], List[str]]: """ Exports a Pytorch or TensorFlow model to an ONNX Intermediate Representation. @@ -99,6 +105,7 @@ def export( raise ImportError("The pip package `diffusers` is required to export stable diffusion models to ONNX.") if is_torch_available() and isinstance(model, nn.Module): +<<<<<<< HEAD <<<<<<< HEAD return export_pytorch( model, config, opset, output, device=device, input_shapes=input_shapes, model_kwargs=model_kwargs @@ -106,6 +113,9 @@ def export( ======= return export_pytorch(model, config, opset, output, device=device, input_shapes=input_shapes) >>>>>>> switch on pytorch frontend +======= + return export_pytorch(model, config, opset, output, device=device, input_shapes=input_shapes, model_kwargs=model_kwargs) +>>>>>>> wip elif is_tf_available() and issubclass(type(model), TFPreTrainedModel): output.parent.mkdir(parents=True, exist_ok=True) @@ -354,10 +364,14 @@ def export_models( output_names: Optional[List[str]] = None, device: str = "cpu", input_shapes: Optional[Dict] = None, +<<<<<<< HEAD <<<<<<< HEAD model_kwargs: Optional[Dict[str, Any]] = None, ) -> Tuple[List[List[str]], List[List[str]]]: ======= +======= + model_kwargs: Optional[Dict[str, Any]] = None, +>>>>>>> wip ) -> Tuple[List[List[str]], List[List[str]]]: """ Exports a Pytorch or TensorFlow encoder decoder model to an ONNX Intermediate Representation. @@ -404,10 +418,14 @@ def export_models( opset=opset, device=device, input_shapes=input_shapes, +<<<<<<< HEAD <<<<<<< HEAD model_kwargs=model_kwargs, ======= >>>>>>> switch on pytorch frontend +======= + model_kwargs=model_kwargs, +>>>>>>> wip ) ) @@ -458,7 +476,14 @@ def remove_none_from_list_tuple(item): for k, v in dummy_inputs.items(): if v is None: continue +<<<<<<< HEAD >>>>>>> fixes for seq2seq +======= + if isinstance(v, dict): + for kk, vv in v.items(): + upd_dummy[kk] = vv + continue +>>>>>>> wip if isinstance(v, (tuple, list)): upd_dummy[k] = remove_none_from_list_tuple(v) continue @@ -477,8 +502,12 @@ def get_input_shapes(dummy_inputs, inputs): def get_input_shapes(dummy_inputs, inputs): input_info = [] for input_name, data in dummy_inputs.items(): +<<<<<<< HEAD if isinstance(data, (tuple, list)): >>>>>>> switch on pytorch frontend +======= + if isinstance(data, (tuple, list, dict)): +>>>>>>> wip return None static_shape = PartialShape(data.shape) if input_name in inputs: @@ -487,6 +516,9 @@ def get_input_shapes(dummy_inputs, inputs): static_shape[dim] = -1 input_info.append((input_name, static_shape)) <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> wip return input_info @@ -496,6 +528,10 @@ def main_export( task: str = "auto", device: str = "cpu", fp16: Optional[bool] = False, +<<<<<<< HEAD +======= + optimize: Optional[str] = None, +>>>>>>> wip monolith: bool = False, framework: Optional[str] = None, cache_dir: Optional[str] = None, @@ -506,15 +542,127 @@ def main_export( force_download: bool = False, local_files_only: bool = False, use_auth_token: Optional[Union[bool, str]] = None, +<<<<<<< HEAD +======= + for_ort: bool = False, +>>>>>>> wip model_kwargs: Optional[Dict[str, Any]] = None, custom_onnx_configs: Optional[Dict[str, "OnnxConfig"]] = None, fn_get_submodels: Optional[Callable] = None, **kwargs_shapes, ): +<<<<<<< HEAD +======= + """ + Full-suite ONNX export. + + Args: + > Required parameters + + model_name_or_path (`str`): + Model ID on huggingface.co or path on disk to the model repository to export. + output (`Union[str, Path]`): + Path indicating the directory where to store the generated ONNX model. + + > Optional parameters + + task (`Optional[str]`, defaults to `None`): + The task to export the model for. If not specified, the task will be auto-inferred based on the model. For decoder models, + use `xxx-with-past` to export the model using past key values in the decoder. + opset (`Optional[int]`, defaults to `None`): + If specified, ONNX opset version to export the model with. Otherwise, the default opset for the given model architecture + will be used. + device (`str`, defaults to `"cpu"`): + The device to use to do the export. Defaults to "cpu". + fp16 (`Optional[bool]`, defaults to `"False"`): + Use half precision during the export. PyTorch-only, requires `device="cuda"`. + optimize (`Optional[str]`, defaults to `None`): + Allows to run ONNX Runtime optimizations directly during the export. Some of these optimizations are specific to + ONNX Runtime, and the resulting ONNX will not be usable with other runtime as OpenVINO or TensorRT. + Available options: `"O1", "O2", "O3", "O4"`. Reference: [`~optimum.onnxruntime.AutoOptimizationConfig`] + monolith (`bool`, defaults to `False`): + Forces to export the model as a single ONNX file. + no_post_process (`bool`, defaults to `False`): + Allows to disable any post-processing done by default on the exported ONNX models. + framework (`Optional[str]`, defaults to `None`): + The framework to use for the ONNX export (`"pt"` or `"tf"`). If not provided, will attempt to automatically detect + the framework for the checkpoint. + atol (`Optional[float]`, defaults to `None`): + If specified, the absolute difference tolerance when validating the model. Otherwise, the default atol for the model will be used. + cache_dir (`Optional[str]`, defaults to `None`): + Path indicating where to store cache. The default Hugging Face cache path will be used by default. + trust_remote_code (`bool`, defaults to `False`): + Allows to use custom code for the modeling hosted in the model repository. This option should only be set for repositories + you trust and in which you have read the code, as it will execute on your local machine arbitrary code present in the + model repository. + pad_token_id (`Optional[int]`, defaults to `None`): + This is needed by some models, for some tasks. If not provided, will attempt to use the tokenizer to guess it. + subfolder (`str`, defaults to `""`): + In case the relevant files are located inside a subfolder of the model repo either locally or on huggingface.co, you can + specify the folder name here. + revision (`str`, defaults to `"main"`): + Revision is the specific model version to use. It can be a branch name, a tag name, or a commit id. + force_download (`bool`, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + local_files_only (`Optional[bool]`, defaults to `False`): + Whether or not to only look at local files (i.e., do not try to download the model). + use_auth_token (`Optional[str]`, defaults to `None`): + The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated + when running `transformers-cli login` (stored in `~/.huggingface`). + model_kwargs (`Optional[Dict[str, Any]]`, defaults to `None`): + Experimental usage: keyword arguments to pass to the model during + the export. This argument should be used along the `custom_onnx_configs` argument + in case, for example, the model inputs/outputs are changed (for example, if + `model_kwargs={"output_attentions": True}` is passed). + custom_onnx_configs (`Optional[Dict[str, OnnxConfig]]`, defaults to `None`): + Experimental usage: override the default ONNX config used for the given model. This argument may be useful for advanced users that desire a finer-grained control on the export. An example is available [here](https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model). + fn_get_submodels (`Optional[Callable]`, defaults to `None`): + Experimental usage: Override the default submodels that are used at the export. This is + especially useful when exporting a custom architecture that needs to split the ONNX (e.g. encoder-decoder). If unspecified with custom models, optimum will try to use the default submodels used for the given task, with no guarantee of success. + use_subprocess (`bool`): + Do the ONNX exported model validation in subprocesses. This is especially useful when + exporting on CUDA device, where ORT does not release memory at inference session + destruction. When set to `True`, the `main_export` call should be guarded in + `if __name__ == "__main__":` block. + **kwargs_shapes (`Dict`): + Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export. + + Example usage: + ```python + >>> from optimum.exporters.onnx import main_export + + >>> main_export("gpt2", output="gpt2_onnx/") + ``` + """ + if optimize == "O4" and device != "cuda": + raise ValueError( + "Requested O4 optimization, but this optimization requires to do the export on GPU." + " Please pass the argument `--device cuda`." + ) + + if (framework == "tf" and fp16 is True) or not is_torch_available(): + raise ValueError("The --fp16 option is supported only for PyTorch.") + + if fp16 is True and device == "cpu": + raise ValueError( + "The --fp16 option is supported only when exporting on GPU. Please pass the option `--device cuda`." + ) + +>>>>>>> wip output = Path(output) if not output.exists(): output.mkdir(parents=True) +<<<<<<< HEAD +======= + if for_ort: + logger.warning( + "The option --for-ort was passed, but its behavior is now the default in the ONNX exporter" + " and passing it is not required anymore." + ) + +>>>>>>> wip original_task = task task = TasksManager.map_from_synonym(task) @@ -590,7 +738,11 @@ def main_export( if original_task == "auto": # Make -with-past the default if --task was not explicitely specified task = task + "-with-past" else: +<<<<<<< HEAD logger.info( +======= + print( +>>>>>>> wip f"The task `{task}` was manually specified, and past key values will not be reused in the decoding." f" if needed, please pass `--task {task}-with-past` to export using the past key values." ) @@ -609,7 +761,11 @@ def main_export( possible_synonyms = f" (possible synonyms are: {synonyms_for_task})" else: possible_synonyms = "" +<<<<<<< HEAD logger.info(f"Automatic task detection to {task}{possible_synonyms}.") +======= + print(f"Automatic task detection to {task}{possible_synonyms}.") +>>>>>>> wip onnx_config, models_and_onnx_configs = __main__._get_submodels_and_onnx_configs( model=model, @@ -687,7 +843,11 @@ def main_export( input_shapes=input_shapes, device=device, model_kwargs=model_kwargs, +<<<<<<< HEAD ) ======= return input_info >>>>>>> switch on pytorch frontend +======= + ) +>>>>>>> wip diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index d7e678f16c..3dcd74722a 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -29,15 +29,14 @@ from transformers.modeling_outputs import CausalLMOutputWithPast from optimum.exporters import TasksManager -from optimum.exporters.onnx import export from optimum.utils import NormalizedConfigManager -#from optimum.exporters.onnx import export from ..utils.import_utils import is_transformers_version from ..utils.modeling_utils import _prepare_attn_mask, _prepare_decoder_attention_mask from .export import export, is_torch_model from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, STR_TO_OV_TYPE +from .export import export, is_torch_model if is_transformers_version("<", "4.25.0"): From 042dd530b5f716094a561de60c325307847ac9b8 Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 2 Aug 2023 13:57:17 +0400 Subject: [PATCH 14/38] cleanup --- optimum/intel/openvino/export.py | 210 ++------------------- optimum/intel/openvino/modeling_decoder.py | 4 - optimum/intel/openvino/quantization.py | 1 + 3 files changed, 15 insertions(+), 200 deletions(-) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py index 063fb8b467..c10c9d985f 100644 --- a/optimum/intel/openvino/export.py +++ b/optimum/intel/openvino/export.py @@ -1,11 +1,9 @@ -import functools -import gc +import logging import inspect import os from pathlib import Path from typing import Dict, List, Optional, Tuple, Union, Any, Callable import functools -import time from openvino import PartialShape, convert_model, serialize from openvino.runtime.utils.types import get_element_type @@ -22,21 +20,16 @@ from optimum.utils.save_utils import maybe_save_preprocessors from optimum.exporters.onnx import __main__ +from openvino.tools.mo import convert_model +from openvino.runtime import serialize, PartialShape +from openvino.runtime.utils.types import get_element_type from .utils import OV_XML_FILE_NAME - logger = logging.getLogger(__name__) if is_torch_available(): import torch.nn as nn from transformers.modeling_utils import PreTrainedModel -<<<<<<< HEAD -<<<<<<< HEAD -======= - from transformers.pytorch_utils import is_torch_less_than_1_11 ->>>>>>> switch on pytorch frontend -======= ->>>>>>> fixes for seq2seq if is_diffusers_available(): from diffusers import ModelMixin @@ -44,19 +37,12 @@ if is_tf_available(): from transformers.modeling_tf_utils import TFPreTrainedModel -<<<<<<< HEAD - -======= ->>>>>>> switch on pytorch frontend def is_torch_model(model): if not is_torch_available(): return False return isinstance(model, nn.Module) -<<<<<<< HEAD -======= ->>>>>>> switch on pytorch frontend def export( model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], config: OnnxConfig, @@ -64,14 +50,7 @@ def export( opset: Optional[int] = None, device: str = "cpu", input_shapes: Optional[Dict] = None, -<<<<<<< HEAD -<<<<<<< HEAD - model_kwargs: Optional[Dict[str, Any]] = None, -======= ->>>>>>> switch on pytorch frontend -======= model_kwargs: Optional[Dict[str, Any]] = None, ->>>>>>> wip ) -> Tuple[List[str], List[str]]: """ Exports a Pytorch or TensorFlow model to an ONNX Intermediate Representation. @@ -105,17 +84,9 @@ def export( raise ImportError("The pip package `diffusers` is required to export stable diffusion models to ONNX.") if is_torch_available() and isinstance(model, nn.Module): -<<<<<<< HEAD -<<<<<<< HEAD return export_pytorch( model, config, opset, output, device=device, input_shapes=input_shapes, model_kwargs=model_kwargs ) -======= - return export_pytorch(model, config, opset, output, device=device, input_shapes=input_shapes) ->>>>>>> switch on pytorch frontend -======= - return export_pytorch(model, config, opset, output, device=device, input_shapes=input_shapes, model_kwargs=model_kwargs) ->>>>>>> wip elif is_tf_available() and issubclass(type(model), TFPreTrainedModel): output.parent.mkdir(parents=True, exist_ok=True) @@ -124,11 +95,7 @@ def export( if device == "cuda": raise RuntimeError("`tf2onnx` does not support export on CUDA device.") if input_shapes is not None: -<<<<<<< HEAD logger.info("`input_shapes` argument is not supported by the Tensorflow ONNX export and will be ignored.") -======= - print("`input_shapes` argument is not supported by the Tensorflow ONNX export and will be ignored.") ->>>>>>> switch on pytorch frontend return export_tensorflow(model, config, opset, output) else: @@ -144,14 +111,7 @@ def export_pytorch( output: Path, device: str = "cpu", input_shapes: Optional[Dict] = None, -<<<<<<< HEAD -<<<<<<< HEAD - model_kwargs: Optional[Dict[str, Any]] = None, -======= ->>>>>>> switch on pytorch frontend -======= model_kwargs: Optional[Dict[str, Any]] = None, ->>>>>>> fixes for seq2seq ) -> Tuple[List[str], List[str]]: """ Exports a PyTorch model to an ONNX Intermediate Representation. @@ -176,7 +136,6 @@ def export_pytorch( the ONNX configuration. """ import torch -<<<<<<< HEAD from torch.utils._pytree import tree_map logger.info(f"Using framework PyTorch: {torch.__version__}") @@ -186,34 +145,22 @@ def export_pytorch( model.config.return_dict = True custom_patcher = type(config).patch_model_for_export != OnnxConfig.patch_model_for_export model.config.torchscript = not custom_patcher -======= - from torch.onnx import export as onnx_export from torch.utils._pytree import tree_map - print(f"Using framework PyTorch: {torch.__version__}") + logger.info(f"Using framework PyTorch: {torch.__version__}") + output = Path(output) with torch.no_grad(): model.config.return_dict = True -<<<<<<< HEAD - model.config.torchscript = True ->>>>>>> switch on pytorch frontend -======= custom_patcher = type(config).patch_model_for_export != OnnxConfig.patch_model_for_export model.config.torchscript = not custom_patcher ->>>>>>> fixes for seq2seq model.eval() # Check if we need to override certain configuration item if config.values_override is not None: -<<<<<<< HEAD logger.info(f"Overriding {len(config.values_override)} configuration item(s)") for override_config_key, override_config_value in config.values_override.items(): logger.info(f"\t- {override_config_key} -> {override_config_value}") -======= - print(f"Overriding {len(config.values_override)} configuration item(s)") - for override_config_key, override_config_value in config.values_override.items(): - print(f"\t- {override_config_key} -> {override_config_value}") ->>>>>>> switch on pytorch frontend setattr(model.config, override_config_key, override_config_value) if input_shapes is None: @@ -231,23 +178,10 @@ def export_pytorch( inputs = config.ordered_inputs(model) input_names = list(inputs.keys()) output_names = list(config.outputs.keys()) -<<<<<<< HEAD -<<<<<<< HEAD -======= - - if hasattr(config, "patch_ops"): - config.patch_ops() - ->>>>>>> switch on pytorch frontend -======= ->>>>>>> fixes for seq2seq if hasattr(model, "forward"): sig = inspect.signature(model.forward) else: sig = inspect.signature(model.call) - -<<<<<<< HEAD -<<<<<<< HEAD dummy_inputs = remove_none_from_dummy_inputs(dummy_inputs) input_info = get_input_shapes(dummy_inputs, inputs) try: @@ -283,42 +217,39 @@ def ts_patched_forward(*args, **kwargs): if idx < len(output_names): out_tensor.get_tensor().set_names({output_names[idx]}) -======= -======= - dummy_inputs = remove_none_from_dummy_inputs(dummy_inputs) ->>>>>>> fixes for seq2seq input_info = get_input_shapes(dummy_inputs, inputs) - start0 = time.perf_counter() try: if custom_patcher: patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs) patched_forward = patcher.patched_forward + @functools.wraps(patched_forward) def ts_patched_forward(*args, **kwargs): outputs = patched_forward(*args, **kwargs) return tuple(outputs.values()) + patcher.patched_forward = ts_patched_forward with patcher: ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) else: ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) except Exception: + model.config.torchscript = False + model.config.return_dict = True onnx_output = output.with_suffix(".onnx") - input_names, output_names = export_pytorch_to_onnx(model, config, opset, onnx_output, device, input_shapes, model_kwargs) + input_names, output_names = export_pytorch_to_onnx( + model, config, opset, onnx_output, device, input_shapes, model_kwargs + ) ov_model = convert_model(onnx_output) serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) - return input_names, output_names + return input_names, output_names, True - end0 = time.perf_counter() - print(f"Convert model took {end0 - start0}s") ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} ordered_input_names = list(inputs) flatten_inputs = flattenize_inputs(ordered_dummy_inputs.values()) for idx, out_tensor in enumerate(ov_model.outputs): if idx < len(output_names): out_tensor.get_tensor().set_names({output_names[idx]}) - ->>>>>>> switch on pytorch frontend for idx, inp_tensor in enumerate(ov_model.inputs): input_name = ordered_input_names[idx] inp_tensor.get_tensor().set_names({input_name}) @@ -327,7 +258,6 @@ def ts_patched_forward(*args, **kwargs): dims = inputs[input_name] for dim in dims: -<<<<<<< HEAD static_shape[dim] = -1 inp_tensor.get_node().set_partial_shape(static_shape) inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) @@ -342,18 +272,6 @@ def clear_class_registry(): torch._C._jit_clear_class_registry() torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() torch.jit._state._clear_class_state() -======= - static_shape[dim] = -1 - inp_tensor.get_node().set_partial_shape(static_shape) - inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) - ov_model.validate_nodes_and_infer_types() - start1 = time.perf_counter() - serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) - end1 = time.perf_counter() - print(f"Serailize model took {end1 - start1}s") - return input_names, output_names ->>>>>>> switch on pytorch frontend - def export_models( models_and_onnx_configs: Dict[ @@ -364,14 +282,7 @@ def export_models( output_names: Optional[List[str]] = None, device: str = "cpu", input_shapes: Optional[Dict] = None, -<<<<<<< HEAD -<<<<<<< HEAD model_kwargs: Optional[Dict[str, Any]] = None, -) -> Tuple[List[List[str]], List[List[str]]]: -======= -======= - model_kwargs: Optional[Dict[str, Any]] = None, ->>>>>>> wip ) -> Tuple[List[List[str]], List[List[str]]]: """ Exports a Pytorch or TensorFlow encoder decoder model to an ONNX Intermediate Representation. @@ -397,7 +308,6 @@ def export_models( `Tuple[List[List[str]], List[List[str]]]`: A tuple with an ordered list of the model's inputs, and the named inputs from the ONNX configuration. """ ->>>>>>> switch on pytorch frontend outputs = [] if output_names is not None and len(output_names) != len(models_and_onnx_configs): @@ -418,14 +328,7 @@ def export_models( opset=opset, device=device, input_shapes=input_shapes, -<<<<<<< HEAD -<<<<<<< HEAD - model_kwargs=model_kwargs, -======= ->>>>>>> switch on pytorch frontend -======= model_kwargs=model_kwargs, ->>>>>>> wip ) ) @@ -436,16 +339,8 @@ def export_models( def flattenize_inputs(inputs): flatten_inputs = [] for input_data in inputs: -<<<<<<< HEAD -<<<<<<< HEAD if input_data is None: continue -======= ->>>>>>> switch on pytorch frontend -======= - if input_data is None: - continue ->>>>>>> fixes for seq2seq if isinstance(input_data, (list, tuple)): flatten_inputs.extend(flattenize_inputs(input_data)) else: @@ -453,16 +348,11 @@ def flattenize_inputs(inputs): return flatten_inputs -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> fixes for seq2seq def remove_none_from_dummy_inputs(dummy_inputs): def remove_none_from_list_tuple(item): new_item = [i for i in item if i is not None] return type(item)(new_item) -<<<<<<< HEAD upd_dummy = {} for k, v in dummy_inputs.items(): if v is None: @@ -471,43 +361,17 @@ def remove_none_from_list_tuple(item): for kk, vv in v.items(): upd_dummy[kk] = vv continue -======= - upd_dummy = {} - for k, v in dummy_inputs.items(): - if v is None: - continue -<<<<<<< HEAD ->>>>>>> fixes for seq2seq -======= - if isinstance(v, dict): - for kk, vv in v.items(): - upd_dummy[kk] = vv - continue ->>>>>>> wip if isinstance(v, (tuple, list)): upd_dummy[k] = remove_none_from_list_tuple(v) continue upd_dummy[k] = v return upd_dummy -<<<<<<< HEAD def get_input_shapes(dummy_inputs, inputs): input_info = [] for input_name, data in dummy_inputs.items(): if isinstance(data, (tuple, list, dict)): -======= -======= ->>>>>>> fixes for seq2seq -def get_input_shapes(dummy_inputs, inputs): - input_info = [] - for input_name, data in dummy_inputs.items(): -<<<<<<< HEAD - if isinstance(data, (tuple, list)): ->>>>>>> switch on pytorch frontend -======= - if isinstance(data, (tuple, list, dict)): ->>>>>>> wip return None static_shape = PartialShape(data.shape) if input_name in inputs: @@ -515,10 +379,6 @@ def get_input_shapes(dummy_inputs, inputs): for dim in dynamic_dims: static_shape[dim] = -1 input_info.append((input_name, static_shape)) -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> wip return input_info @@ -528,10 +388,6 @@ def main_export( task: str = "auto", device: str = "cpu", fp16: Optional[bool] = False, -<<<<<<< HEAD -======= - optimize: Optional[str] = None, ->>>>>>> wip monolith: bool = False, framework: Optional[str] = None, cache_dir: Optional[str] = None, @@ -542,17 +398,11 @@ def main_export( force_download: bool = False, local_files_only: bool = False, use_auth_token: Optional[Union[bool, str]] = None, -<<<<<<< HEAD -======= - for_ort: bool = False, ->>>>>>> wip model_kwargs: Optional[Dict[str, Any]] = None, custom_onnx_configs: Optional[Dict[str, "OnnxConfig"]] = None, fn_get_submodels: Optional[Callable] = None, **kwargs_shapes, ): -<<<<<<< HEAD -======= """ Full-suite ONNX export. @@ -635,12 +485,6 @@ def main_export( >>> main_export("gpt2", output="gpt2_onnx/") ``` """ - if optimize == "O4" and device != "cuda": - raise ValueError( - "Requested O4 optimization, but this optimization requires to do the export on GPU." - " Please pass the argument `--device cuda`." - ) - if (framework == "tf" and fp16 is True) or not is_torch_available(): raise ValueError("The --fp16 option is supported only for PyTorch.") @@ -649,20 +493,9 @@ def main_export( "The --fp16 option is supported only when exporting on GPU. Please pass the option `--device cuda`." ) ->>>>>>> wip output = Path(output) if not output.exists(): output.mkdir(parents=True) - -<<<<<<< HEAD -======= - if for_ort: - logger.warning( - "The option --for-ort was passed, but its behavior is now the default in the ONNX exporter" - " and passing it is not required anymore." - ) - ->>>>>>> wip original_task = task task = TasksManager.map_from_synonym(task) @@ -738,11 +571,7 @@ def main_export( if original_task == "auto": # Make -with-past the default if --task was not explicitely specified task = task + "-with-past" else: -<<<<<<< HEAD logger.info( -======= - print( ->>>>>>> wip f"The task `{task}` was manually specified, and past key values will not be reused in the decoding." f" if needed, please pass `--task {task}-with-past` to export using the past key values." ) @@ -761,11 +590,7 @@ def main_export( possible_synonyms = f" (possible synonyms are: {synonyms_for_task})" else: possible_synonyms = "" -<<<<<<< HEAD logger.info(f"Automatic task detection to {task}{possible_synonyms}.") -======= - print(f"Automatic task detection to {task}{possible_synonyms}.") ->>>>>>> wip onnx_config, models_and_onnx_configs = __main__._get_submodels_and_onnx_configs( model=model, @@ -843,11 +668,4 @@ def main_export( input_shapes=input_shapes, device=device, model_kwargs=model_kwargs, -<<<<<<< HEAD - ) -======= - return input_info ->>>>>>> switch on pytorch frontend -======= ) ->>>>>>> wip diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 3dcd74722a..b60564c382 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -17,7 +17,6 @@ from pathlib import Path from tempfile import TemporaryDirectory from typing import Dict, Optional, Tuple, Union -import time import numpy as np import openvino @@ -227,10 +226,7 @@ def _from_transformers( "force_download": force_download, "trust_remote_code": trust_remote_code, } - start0 = time.perf_counter() model = TasksManager.get_model_from_task(task, model_id, **model_kwargs) - end0 = time.perf_counter() - print(f"Reading PT model took {end0 - start0}") config.is_decoder = True config.is_encoder_decoder = False onnx_config_constructor = TasksManager.get_exporter_config_constructor(model=model, exporter="onnx", task=task) diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 311ad211f9..fdbd12dd60 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -34,6 +34,7 @@ from transformers import DataCollator, PreTrainedModel, default_data_collator from optimum.exporters.onnx import export as onnx_export +from .export import export from optimum.exporters.tasks import TasksManager from optimum.quantization_base import OptimumQuantizer From 1dd4478970c88936164cf9f8cab4268a4faa4d9a Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 2 Aug 2023 14:23:41 +0400 Subject: [PATCH 15/38] fix style --- optimum/intel/openvino/export.py | 18 +++++++++--------- optimum/intel/openvino/modeling_base.py | 3 +-- .../intel/openvino/modeling_base_seq2seq.py | 1 - optimum/intel/openvino/modeling_decoder.py | 1 - optimum/intel/openvino/quantization.py | 1 - 5 files changed, 10 insertions(+), 14 deletions(-) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py index c10c9d985f..4937e09946 100644 --- a/optimum/intel/openvino/export.py +++ b/optimum/intel/openvino/export.py @@ -1,9 +1,9 @@ -import logging +import functools import inspect +import logging import os from pathlib import Path -from typing import Dict, List, Optional, Tuple, Union, Any, Callable -import functools +from typing import Any, Callable, Dict, List, Optional, Tuple, Union from openvino import PartialShape, convert_model, serialize from openvino.runtime.utils.types import get_element_type @@ -11,23 +11,23 @@ from transformers import AutoTokenizer from transformers.utils import is_tf_available, is_torch_available from transformers import AutoTokenizer +from transformers.utils import is_tf_available, is_torch_available -from optimum.utils import is_diffusers_available, DEFAULT_DUMMY_SHAPES from optimum.exporters import TasksManager +from optimum.exporters.onnx import __main__ from optimum.exporters.onnx.base import OnnxConfig, OnnxConfigWithPast -from optimum.exporters.onnx.convert import export_tensorflow, check_dummy_inputs_are_allowed +from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed, export_tensorflow from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx +from optimum.utils import DEFAULT_DUMMY_SHAPES, is_diffusers_available from optimum.utils.save_utils import maybe_save_preprocessors -from optimum.exporters.onnx import __main__ -from openvino.tools.mo import convert_model -from openvino.runtime import serialize, PartialShape -from openvino.runtime.utils.types import get_element_type from .utils import OV_XML_FILE_NAME + logger = logging.getLogger(__name__) if is_torch_available(): + import torch import torch.nn as nn from transformers.modeling_utils import PreTrainedModel diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index ec480a18a3..ef14f8c340 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -17,14 +17,13 @@ from pathlib import Path from tempfile import TemporaryDirectory from typing import Dict, Optional, Union -import time import openvino from huggingface_hub import hf_hub_download from openvino import Core, convert_model from openvino._offline_transformations import apply_moc_transformations, compress_model_transformation -from openvino.tools import mo from openvino.runtime import Core +from openvino.tools import mo from transformers import PretrainedConfig from transformers.file_utils import add_start_docstrings diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py index f80cd58030..5a5e195845 100644 --- a/optimum/intel/openvino/modeling_base_seq2seq.py +++ b/optimum/intel/openvino/modeling_base_seq2seq.py @@ -26,7 +26,6 @@ from optimum.exporters import TasksManager from optimum.exporters.onnx import get_encoder_decoder_models_for_export -from .export import export_models from ..utils.import_utils import is_transformers_version from .export import export_models diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index b60564c382..7cc2b34f2c 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -35,7 +35,6 @@ from .export import export, is_torch_model from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, STR_TO_OV_TYPE -from .export import export, is_torch_model if is_transformers_version("<", "4.25.0"): diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index fdbd12dd60..311ad211f9 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -34,7 +34,6 @@ from transformers import DataCollator, PreTrainedModel, default_data_collator from optimum.exporters.onnx import export as onnx_export -from .export import export from optimum.exporters.tasks import TasksManager from optimum.quantization_base import OptimumQuantizer From 13d71b23736e8ab4d662adcb7bff9030f40fe32b Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Wed, 2 Aug 2023 14:14:43 +0400 Subject: [PATCH 16/38] revert changes not related to pr --- optimum/intel/openvino/modeling_decoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 7cc2b34f2c..b5cbc2ba48 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -234,7 +234,7 @@ def _from_transformers( # TODO : create ModelPatcher to patch each architecture if config.model_type == "bloom": model.transformer._prepare_attn_mask = _prepare_attn_mask - elif config.model_type in {"llama", "longllama"}: + elif config.model_type == "llama": model.model._prepare_decoder_attention_mask = _prepare_decoder_attention_mask elif config.model_type in {"blenderbot-small", "blenderbot", "opt", "pegasus", "bart"}: model.model.decoder._prepare_decoder_attention_mask = _prepare_decoder_attention_mask From 891ec4aeb66c698db32e4a7f0a0b62558efc1666 Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 2 Aug 2023 16:19:35 +0400 Subject: [PATCH 17/38] clear ts registry: --- optimum/intel/openvino/export.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py index 4937e09946..e82d3a66c6 100644 --- a/optimum/intel/openvino/export.py +++ b/optimum/intel/openvino/export.py @@ -1,4 +1,5 @@ import functools +import gc import inspect import logging import os @@ -273,6 +274,7 @@ def clear_class_registry(): torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() torch.jit._state._clear_class_state() + def export_models( models_and_onnx_configs: Dict[ str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], "OnnxConfig"] From d24a509873eeb01f1f0317462ed56f3189d53f74 Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 2 Aug 2023 18:00:44 +0400 Subject: [PATCH 18/38] remove ov dev from deps --- optimum/intel/openvino/export.py | 6 +++--- optimum/intel/openvino/modeling_base.py | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py index e82d3a66c6..218eeb235f 100644 --- a/optimum/intel/openvino/export.py +++ b/optimum/intel/openvino/export.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from openvino import PartialShape, convert_model, serialize +from openvino import PartialShape, convert_model, save_model from openvino.runtime.utils.types import get_element_type from requests.exceptions import ConnectionError as RequestsConnectionError from transformers import AutoTokenizer @@ -208,7 +208,7 @@ def ts_patched_forward(*args, **kwargs): model, config, opset, onnx_output, device, input_shapes, model_kwargs ) ov_model = convert_model(onnx_output) - serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) + save_model(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) return input_names, output_names, True clear_class_registry() ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} @@ -263,7 +263,7 @@ def ts_patched_forward(*args, **kwargs): inp_tensor.get_node().set_partial_shape(static_shape) inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) ov_model.validate_nodes_and_infer_types() - serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) + save_model(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) del model gc.collect() return input_names, output_names, False diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index ef14f8c340..4c99bef09f 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -22,8 +22,6 @@ from huggingface_hub import hf_hub_download from openvino import Core, convert_model from openvino._offline_transformations import apply_moc_transformations, compress_model_transformation -from openvino.runtime import Core -from openvino.tools import mo from transformers import PretrainedConfig from transformers.file_utils import add_start_docstrings From 740351b569d65a5db0e2d1c99c222a4f4926ef7c Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 2 Aug 2023 18:46:46 +0400 Subject: [PATCH 19/38] return serialize back --- optimum/intel/openvino/export.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py index 218eeb235f..e82d3a66c6 100644 --- a/optimum/intel/openvino/export.py +++ b/optimum/intel/openvino/export.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from openvino import PartialShape, convert_model, save_model +from openvino import PartialShape, convert_model, serialize from openvino.runtime.utils.types import get_element_type from requests.exceptions import ConnectionError as RequestsConnectionError from transformers import AutoTokenizer @@ -208,7 +208,7 @@ def ts_patched_forward(*args, **kwargs): model, config, opset, onnx_output, device, input_shapes, model_kwargs ) ov_model = convert_model(onnx_output) - save_model(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) + serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) return input_names, output_names, True clear_class_registry() ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} @@ -263,7 +263,7 @@ def ts_patched_forward(*args, **kwargs): inp_tensor.get_node().set_partial_shape(static_shape) inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) ov_model.validate_nodes_and_infer_types() - save_model(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) + serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) del model gc.collect() return input_names, output_names, False From eaab783c21094c9fa43efb72806a6f32b7f7052a Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 3 Aug 2023 11:01:33 +0400 Subject: [PATCH 20/38] Added weights compression --- optimum/intel/openvino/quantization.py | 54 +++++++++++++------------- tests/openvino/test_quantization.py | 7 ++-- 2 files changed, 29 insertions(+), 32 deletions(-) diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 311ad211f9..388f2cdfdf 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -24,14 +24,15 @@ import transformers from accelerate.data_loader import DataLoaderStateMixin from datasets import Dataset, load_dataset -from nncf import NNCFConfig -from nncf.torch import create_compressed_model, register_default_init_args +from nncf import NNCFConfig, compress_weights +from nncf.torch import create_compressed_model, register_default_init_args, register_module from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_inputs_with_objwalk from nncf.torch.initialization import PTInitializingDataLoader from openvino._offline_transformations import compress_quantize_weights_transformation from openvino.runtime import Core, Tensor from torch.utils.data import DataLoader, RandomSampler, TensorDataset from transformers import DataCollator, PreTrainedModel, default_data_collator +from transformers.pytorch_utils import Conv1D from optimum.exporters.onnx import export as onnx_export from optimum.exporters.tasks import TasksManager @@ -50,6 +51,8 @@ ) +register_module(ignored_algorithms=[])(Conv1D) + core = Core() logger = logging.getLogger(__name__) @@ -345,36 +348,31 @@ def _quantize_torchmodel( model_type=model_type, ) - if weights_only: - calibration_dataset = TensorDataset(torch.tensor([0.0, 1.0])) - calibration_dataset.column_names = [] - remove_unused_columns = False - onnx_config = onnx_config_class(self.model.config) - - def data_collator(batch): - return onnx_config.generate_dummy_inputs(framework="pt") - - calibration_dataloader = self._get_calibration_dataloader( - calibration_dataset=calibration_dataset, - batch_size=batch_size, - remove_unused_columns=remove_unused_columns, - data_collator=data_collator, - ) - if quantization_config is None: logger.info( "No configuration describing the quantization process was provided, a default OVConfig will be generated." ) - quantization_config = OVConfig(compression=INT8_WEIGHT_COMPRESSION_CONFIG) if weights_only else OVConfig() - - model_inputs = next(iter(calibration_dataloader)) - quantization_config.add_input_info(model_inputs) - nncf_config = NNCFConfig.from_dict(quantization_config.__dict__) - nncf_config = register_default_init_args(nncf_config, calibration_dataloader) - controller, compressed_model = create_compressed_model( - self.model, nncf_config, wrap_inputs_fn=wrap_nncf_model_inputs_with_objwalk - ) - compressed_model = controller.strip(do_copy=False) + quantization_config = OVConfig() + + if weights_only: + compressed_model = compress_weights(self.model) + self.model = compressed_model + else: + calibration_dataloader = self._get_calibration_dataloader( + calibration_dataset=calibration_dataset, + batch_size=batch_size, + remove_unused_columns=remove_unused_columns, + data_collator=data_collator, + ) + + model_inputs = next(iter(calibration_dataloader)) + quantization_config.add_input_info(model_inputs) + nncf_config = NNCFConfig.from_dict(quantization_config.__dict__) + nncf_config = register_default_init_args(nncf_config, calibration_dataloader) + controller, compressed_model = create_compressed_model( + self.model, nncf_config, wrap_inputs_fn=wrap_nncf_model_inputs_with_objwalk + ) + compressed_model = controller.strip(do_copy=False) task = self.task model = self.model diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 4a2f96447c..51dfe98507 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -146,8 +146,8 @@ def preprocess_function(examples, tokenizer): class OVWeightCompressionTest(unittest.TestCase): # TODO : add models SUPPORTED_ARCHITECTURES_WITH_EXPECTED_COMPRESSED_MATMULS = ( - (OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 35), - (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 5), + (OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 70), + (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 45), ) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_COMPRESSED_MATMULS) @@ -173,9 +173,8 @@ def test_automodel_weight_compression(self, model_cls, model_name, expected_int8 self.assertTrue("logits" in outputs) # Verify that that the configuration is correctly saved and loaded - expected_config = OVConfig(compression=INT8_WEIGHT_COMPRESSION_CONFIG) loaded_config = OVConfig.from_pretrained(tmp_dir) - self.assertEqual(expected_config.to_dict()["compression"], loaded_config.to_dict()["compression"]) + self.assertIsNotNone(loaded_config) class OVQuantizerQATest(unittest.TestCase): From 4d8628465c9ddb9bde0fb23d9b19ced10faef499 Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 3 Aug 2023 11:12:43 +0400 Subject: [PATCH 21/38] Changed NNCF version to develop --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7bdb9c062e..020d4e8826 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ "onnxruntime<1.15.0", ], "openvino": ["openvino==2023.1.0.dev20230728", "onnx", "onnxruntime"], - "nncf": ["nncf>=2.5.0"], + "nncf": ["git+https://github.com/openvinotoolkit/nncf.git"], "ipex": ["transformers<4.32.0", "intel-extension-for-pytorch", "onnx"], "diffusers": ["diffusers", "invisible-watermark>=0.2.0"], "quality": QUALITY_REQUIRE, From 69794aeb1742a5d10bdafc9c583e4fbe52edcc0c Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 9 Aug 2023 22:01:57 +0400 Subject: [PATCH 22/38] resolve dictionary as input --- optimum/exporters/openvino/__main__.py | 227 ++++++++++++++ optimum/exporters/openvino/convert.py | 290 ++++++++++++++++++ optimum/exporters/openvino/utils.py | 81 +++++ optimum/intel/openvino/modeling_base.py | 4 +- .../intel/openvino/modeling_base_seq2seq.py | 2 +- optimum/intel/openvino/modeling_decoder.py | 2 +- optimum/intel/openvino/modeling_diffusion.py | 2 +- optimum/intel/openvino/quantization.py | 2 +- optimum/intel/openvino/trainer.py | 2 +- 9 files changed, 605 insertions(+), 7 deletions(-) create mode 100644 optimum/exporters/openvino/__main__.py create mode 100644 optimum/exporters/openvino/convert.py create mode 100644 optimum/exporters/openvino/utils.py diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py new file mode 100644 index 0000000000..2c3428aa0c --- /dev/null +++ b/optimum/exporters/openvino/__main__.py @@ -0,0 +1,227 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +from pathlib import Path +from typing import Any, Callable, Dict, Optional, Union + +from requests.exceptions import ConnectionError as RequestsConnectionError +from transformers import AutoTokenizer +from transformers.utils import is_torch_available + +from optimum.exporters import TasksManager +from optimum.exporters.onnx import __main__ as optimum_main +from optimum.exporters.onnx.base import OnnxConfig, OnnxConfigWithPast +from optimum.utils import DEFAULT_DUMMY_SHAPES +from optimum.utils.save_utils import maybe_save_preprocessors + +from ...intel.openvino.utils import OV_XML_FILE_NAME +from .convert import export_models + + +logger = logging.getLogger(__name__) + +if is_torch_available(): + import torch + + +def main_export( + model_name_or_path: str, + output: Union[str, Path], + task: str = "auto", + device: str = "cpu", + fp16: Optional[bool] = False, + framework: Optional[str] = None, + cache_dir: Optional[str] = None, + trust_remote_code: bool = False, + pad_token_id: Optional[int] = None, + subfolder: str = "", + revision: str = "main", + force_download: bool = False, + local_files_only: bool = False, + use_auth_token: Optional[Union[bool, str]] = None, + model_kwargs: Optional[Dict[str, Any]] = None, + custom_onnx_configs: Optional[Dict[str, "OnnxConfig"]] = None, + fn_get_submodels: Optional[Callable] = None, + **kwargs_shapes, +): + output = Path(output) + if not output.exists(): + output.mkdir(parents=True) + + original_task = task + task = TasksManager.map_from_synonym(task) + + framework = TasksManager.determine_framework(model_name_or_path, subfolder=subfolder, framework=framework) + + # get the shapes to be used to generate dummy inputs + input_shapes = {} + for input_name in DEFAULT_DUMMY_SHAPES.keys(): + input_shapes[input_name] = ( + kwargs_shapes[input_name] if input_name in kwargs_shapes else DEFAULT_DUMMY_SHAPES[input_name] + ) + + torch_dtype = None if fp16 is False else torch.float16 + + if task == "auto": + try: + task = TasksManager.infer_task_from_model(model_name_or_path) + except KeyError as e: + raise KeyError( + f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" + ) + except RequestsConnectionError as e: + raise RequestsConnectionError( + f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" + ) + + model = TasksManager.get_model_from_task( + task, + model_name_or_path, + subfolder=subfolder, + revision=revision, + cache_dir=cache_dir, + use_auth_token=use_auth_token, + local_files_only=local_files_only, + force_download=force_download, + trust_remote_code=trust_remote_code, + framework=framework, + torch_dtype=torch_dtype, + device=device, + ) + + custom_architecture = False + is_stable_diffusion = "stable-diffusion" in task + model_type = "stable-diffusion" if is_stable_diffusion else model.config.model_type.replace("_", "-") + + if not is_stable_diffusion: + if model_type in TasksManager._UNSUPPORTED_CLI_MODEL_TYPE: + raise ValueError( + f"{model_type} is not supported yet. Only {TasksManager._SUPPORTED_CLI_MODEL_TYPE} are supported. " + f"If you want to support {model_type} please propose a PR or open up an issue." + ) + if model.config.model_type.replace("-", "_") not in TasksManager.get_supported_model_type_for_task( + task, exporter="onnx" + ): + custom_architecture = True + + if custom_architecture and custom_onnx_configs is None: + raise ValueError( + "Trying to export a model with a custom architecture, but no custom onnx configuration was passed as `custom_onnx_configs`. Please refer to https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#custom-export-of-transformers-models for an example on how to export custom models." + ) + + if custom_architecture and original_task == "auto": + raise ValueError( + f'Automatic task detection is not supported with custom architectures. Please specify the `task` argument. Suggestion: task="{task}" (or task="{task}-with-past" if the model is decoder-based and supports KV cache)' + ) + + if ( + not custom_architecture + and not is_stable_diffusion + and task + "-with-past" in TasksManager.get_supported_tasks_for_model_type(model_type, "onnx") + ): + if original_task == "auto": # Make -with-past the default if --task was not explicitely specified + task = task + "-with-past" + else: + logger.info( + f"The task `{task}` was manually specified, and past key values will not be reused in the decoding." + f" if needed, please pass `--task {task}-with-past` to export using the past key values." + ) + + if original_task == "auto": + synonyms_for_task = sorted(TasksManager.synonyms_for_task(task)) + if synonyms_for_task: + synonyms_for_task = ", ".join(synonyms_for_task) + possible_synonyms = f" (possible synonyms are: {synonyms_for_task})" + else: + possible_synonyms = "" + logger.info(f"Automatic task detection to {task}{possible_synonyms}.") + onnx_config, models_and_onnx_configs = optimum_main._get_submodels_and_onnx_configs( + model=model, + task=task, + monolith=False, + custom_onnx_configs=custom_onnx_configs if custom_onnx_configs is not None else {}, + custom_architecture=custom_architecture, + fn_get_submodels=fn_get_submodels, + ) + + if not is_stable_diffusion: + needs_pad_token_id = ( + isinstance(onnx_config, OnnxConfigWithPast) + and getattr(model.config, "pad_token_id", None) is None + and task in ["text-classification"] + ) + if needs_pad_token_id: + if pad_token_id is not None: + model.config.pad_token_id = pad_token_id + else: + try: + tok = AutoTokenizer.from_pretrained(model_name_or_path) + model.config.pad_token_id = tok.pad_token_id + except Exception: + raise ValueError( + "Could not infer the pad token id, which is needed in this case, please provide it with the --pad_token_id argument" + ) + # Saving the model config and preprocessor as this is needed sometimes. + model.config.save_pretrained(output) + generation_config = getattr(model, "generation_config", None) + if generation_config is not None: + generation_config.save_pretrained(output) + maybe_save_preprocessors(model_name_or_path, output) + + if model.config.is_encoder_decoder and task.startswith("text-generation"): + raise ValueError( + f"model.config.is_encoder_decoder is True and task is `{task}`, which are incompatible. If the task was auto-inferred, please fill a bug report" + f"at https://github.com/huggingface/optimum, if --task was explicitely passed, make sure you selected the right task for the model," + f" referring to `optimum.exporters.tasks.TaskManager`'s `_TASKS_TO_AUTOMODELS`." + ) + + files_subpaths = None + else: + # save the subcomponent configuration + for model_name in models_and_onnx_configs: + subcomponent = models_and_onnx_configs[model_name][0] + if hasattr(subcomponent, "save_config"): + subcomponent.save_config(output / model_name) + elif hasattr(subcomponent, "config") and hasattr(subcomponent.config, "save_pretrained"): + subcomponent.config.save_pretrained(output / model_name) + + files_subpaths = [os.path.join(name_dir, OV_XML_FILE_NAME) for name_dir in models_and_onnx_configs] + + # Saving the additional components needed to perform inference. + model.scheduler.save_pretrained(output.joinpath("scheduler")) + + feature_extractor = getattr(model, "feature_extractor", None) + if feature_extractor is not None: + feature_extractor.save_pretrained(output.joinpath("feature_extractor")) + + tokenizer = getattr(model, "tokenizer", None) + if tokenizer is not None: + tokenizer.save_pretrained(output.joinpath("tokenizer")) + + tokenizer_2 = getattr(model, "tokenizer_2", None) + if tokenizer_2 is not None: + tokenizer_2.save_pretrained(output.joinpath("tokenizer_2")) + + model.save_config(output) + + export_models( + models_and_onnx_configs=models_and_onnx_configs, + output_dir=output, + output_names=files_subpaths, + input_shapes=input_shapes, + device=device, + model_kwargs=model_kwargs, + ) diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py new file mode 100644 index 0000000000..86c4ed7725 --- /dev/null +++ b/optimum/exporters/openvino/convert.py @@ -0,0 +1,290 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import gc +import inspect +import logging +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union + +from transformers.utils import is_tf_available, is_torch_available + +from openvino.runtime import PartialShape, save_model +from openvino.runtime.utils.types import get_element_type +from openvino.tools.ovc import convert_model +from optimum.exporters.onnx.base import OnnxConfig +from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed, export_tensorflow +from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx +from optimum.utils import is_diffusers_available + +from ...intel.openvino.utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME +from .utils import ( + clear_class_registry, + flattenize_inputs, + get_input_shapes, + remove_none_from_dummy_inputs, +) + + +logger = logging.getLogger(__name__) + +if is_torch_available(): + import torch.nn as nn + from transformers.modeling_utils import PreTrainedModel + +if is_diffusers_available(): + from diffusers import ModelMixin + +if is_tf_available(): + from transformers.modeling_tf_utils import TFPreTrainedModel + + +def export( + model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], + config: OnnxConfig, + output: Path, + opset: Optional[int] = None, + device: str = "cpu", + input_shapes: Optional[Dict] = None, + model_kwargs: Optional[Dict[str, Any]] = None, +) -> Tuple[List[str], List[str]]: + """ + Exports a Pytorch or TensorFlow model to an OpenVINO Intermediate Representation. + + Args: + model ([`PreTrainedModel`] or [`TFPreTrainedModel`]): + The model to export. + config ([`~exporters.onnx.config.OnnxConfig`]): + The ONNX configuration associated with the exported model. + output (`Path`): + Directory to store the exported ONNX model. + opset (`Optional[int]`, defaults to `None`): + The version of the ONNX operator set to use. + device (`str`, *optional*, defaults to `cpu`): + The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for + export on CUDA devices. + input_shapes (`Optional[Dict]`, defaults to `None`): + If specified, allows to use specific shapes for the example input provided to the ONNX exporter. + + Returns: + `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from + the ONNX configuration. + """ + if not (is_torch_available() or is_tf_available()): + raise ImportError( + "Cannot convert because neither PyTorch nor TensorFlow are installed. " + "Please install torch or tensorflow first." + ) + + if "diffusers" in str(model.__class__) and not is_diffusers_available(): + raise ImportError("The pip package `diffusers` is required to export stable diffusion models to ONNX.") + + if is_torch_available() and isinstance(model, nn.Module): + return export_pytorch( + model, config, opset, output, device=device, input_shapes=input_shapes, model_kwargs=model_kwargs + ) + + elif is_tf_available() and issubclass(type(model), TFPreTrainedModel): + output.parent.mkdir(parents=True, exist_ok=True) + if opset is None: + opset = config.DEFAULT_ONNX_OPSET + if device == "cuda": + raise RuntimeError("`tf2onnx` does not support export on CUDA device.") + if input_shapes is not None: + logger.info("`input_shapes` argument is not supported by the Tensorflow ONNX export and will be ignored.") + return export_tensorflow(model, config, opset, output) + + else: + raise RuntimeError( + "You either provided a PyTorch model with only TensorFlow installed, or a TensorFlow model with only PyTorch installed." + ) + + +def export_pytorch( + model: Union["PreTrainedModel", "ModelMixin"], + config: OnnxConfig, + opset: int, + output: Path, + device: str = "cpu", + input_shapes: Optional[Dict] = None, + model_kwargs: Optional[Dict[str, Any]] = None, +) -> Tuple[List[str], List[str]]: + """ + Exports a PyTorch model to an OpenVINO Intermediate Representation. + + Args: + model ([`PreTrainedModel`]): + The model to export. + config ([`~exporters.onnx.config.OnnxConfig`]): + The ONNX configuration associated with the exported model. + opset (`int`): + The version of the ONNX operator set to use. + output (`Path`): + Directory to store the exported ONNX model. + device (`str`, defaults to `"cpu"`): + The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for + export on CUDA devices. + input_shapes (`optional[Dict]`, defaults to `None`): + If specified, allows to use specific shapes for the example input provided to the ONNX exporter. + + Returns: + `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from + the ONNX configuration. + """ + import torch + from torch.utils._pytree import tree_map + + logger.info(f"Using framework PyTorch: {torch.__version__}") + output = Path(output) + + with torch.no_grad(): + model.config.return_dict = True + model.eval() + + # Check if we need to override certain configuration item + if config.values_override is not None: + logger.info(f"Overriding {len(config.values_override)} configuration item(s)") + for override_config_key, override_config_value in config.values_override.items(): + logger.info(f"\t- {override_config_key} -> {override_config_value}") + setattr(model.config, override_config_key, override_config_value) + + if input_shapes is None: + input_shapes = {} # will use the defaults from DEFAULT_DUMMY_SHAPES + + # Check that inputs match, and order them properly + dummy_inputs = config.generate_dummy_inputs(framework="pt", **input_shapes) + device = torch.device(device) + if device.type == "cuda" and torch.cuda.is_available(): + model.to(device) + dummy_inputs = tree_map( + lambda value: value.to(device) if isinstance(value, torch.Tensor) else value, dummy_inputs + ) + check_dummy_inputs_are_allowed(model, dummy_inputs) + inputs = config.ordered_inputs(model) + input_names = list(inputs.keys()) + output_names = list(config.outputs.keys()) + if hasattr(model, "forward"): + sig = inspect.signature(model.forward) + else: + sig = inspect.signature(model.call) + + dummy_inputs, dict_inputs = remove_none_from_dummy_inputs(dummy_inputs) + input_info = get_input_shapes(dummy_inputs, inputs) + try: + patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs) + patched_forward = patcher.patched_forward + + @functools.wraps(patched_forward) + def ts_patched_forward(*args, **kwargs): + for i in range(len(dict_inputs)): + input_name = dict_inputs[i][0] + keys = dict_inputs[i][1] + tuple_input = kwargs[input_name] + input_dict = dict(zip(keys, tuple_input)) + kwargs[input_name] = input_dict + outputs = patched_forward(*args, **kwargs) + return tuple(outputs.values()) + + patcher.patched_forward = ts_patched_forward + with patcher: + ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) + except Exception: + orig_torch_onnx_export = torch.onnx.export + + torch.onnx.export = functools.partial(orig_torch_onnx_export, do_constant_folding=True) + model.config.torchscript = False + model.config.return_dict = True + onnx_output = ( + output.with_suffix(".onnx") + if not output.name != OV_XML_FILE_NAME + else output.parent / ONNX_WEIGHTS_NAME + ) + input_names, output_names = export_pytorch_to_onnx( + model, config, opset, onnx_output, device, input_shapes, model_kwargs + ) + torch.onnx.export = orig_torch_onnx_export + ov_model = convert_model(str(onnx_output)) + save_model( + ov_model, + output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output, + compress_to_fp16=False, + ) + return input_names, output_names, True + clear_class_registry() + ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} + ordered_input_names = list(inputs) + flatten_inputs = flattenize_inputs(ordered_dummy_inputs.values()) + ov_model.validate_nodes_and_infer_types() + for idx, out_tensor in enumerate(ov_model.outputs): + if idx < len(output_names): + out_tensor.get_tensor().set_names({output_names[idx]}) + + for idx, inp_tensor in enumerate(ov_model.inputs): + input_name = ordered_input_names[idx] + inp_tensor.get_tensor().set_names({input_name}) + inp_data = flatten_inputs[idx] + static_shape = PartialShape(inp_data.shape) + dims = inputs[input_name] + + for dim in dims: + static_shape[dim] = -1 + inp_tensor.get_node().set_partial_shape(static_shape) + inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) + ov_model.validate_nodes_and_infer_types() + save_model( + ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output, compress_to_fp16=False + ) + del model + gc.collect() + return input_names, output_names, False + + +def export_models( + models_and_onnx_configs: Dict[ + str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], "OnnxConfig"] + ], + output_dir: Path, + opset: Optional[int] = None, + output_names: Optional[List[str]] = None, + device: str = "cpu", + input_shapes: Optional[Dict] = None, + model_kwargs: Optional[Dict[str, Any]] = None, +) -> Tuple[List[List[str]], List[List[str]]]: + outputs = [] + + if output_names is not None and len(output_names) != len(models_and_onnx_configs): + raise ValueError( + f"Provided custom names {output_names} for the export of {len(models_and_onnx_configs)} models. Please provide the same number of names as models to export." + ) + + for i, model_name in enumerate(models_and_onnx_configs.keys()): + submodel, sub_onnx_config = models_and_onnx_configs[model_name] + output_name = output_names[i] if output_names is not None else Path(model_name + ".xml") + output_path = output_dir / output_name + output_path.parent.mkdir(parents=True, exist_ok=True) + outputs.append( + export( + model=submodel, + config=sub_onnx_config, + output=output_path, + opset=opset, + device=device, + input_shapes=input_shapes, + model_kwargs=model_kwargs, + ) + ) + + outputs = list(map(list, zip(*outputs))) + return outputs diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py new file mode 100644 index 0000000000..9b1867ba83 --- /dev/null +++ b/optimum/exporters/openvino/utils.py @@ -0,0 +1,81 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from transformers.utils import is_torch_available + +from openvino.runtime import PartialShape + + +if is_torch_available(): + import torch + import torch.nn as nn + + +def is_torch_model(model): + if not is_torch_available(): + return False + return isinstance(model, nn.Module) + + +def flattenize_inputs(inputs): + flatten_inputs = [] + for input_data in inputs: + if input_data is None: + continue + if isinstance(input_data, (list, tuple)): + flatten_inputs.extend(flattenize_inputs(input_data)) + else: + flatten_inputs.append(input_data) + return flatten_inputs + + +def remove_none_from_dummy_inputs(dummy_inputs): + def remove_none_from_list_tuple(item): + new_item = [i for i in item if i is not None] + return type(item)(new_item) + + upd_dummy = {} + dict_dummy = [] + for k, v in dummy_inputs.items(): + if v is None: + continue + if isinstance(v, dict): + dict_dummy.append((k, list(v.keys()))) + upd_dummy[k] = remove_none_from_list_tuple(tuple(v.values())) + continue + if isinstance(v, (tuple, list)): + upd_dummy[k] = remove_none_from_list_tuple(v) + continue + upd_dummy[k] = v + return upd_dummy, dict_dummy + + +def get_input_shapes(dummy_inputs, inputs): + input_info = [] + for input_name, data in dummy_inputs.items(): + if isinstance(data, (tuple, list, dict)): + return None + static_shape = PartialShape(data.shape) + if input_name in inputs: + dynamic_dims = inputs[input_name] + for dim in dynamic_dims: + static_shape[dim] = -1 + input_info.append((input_name, static_shape)) + return input_info + + +def clear_class_registry(): + torch._C._jit_clear_class_registry() + torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() + torch.jit._state._clear_class_state() diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 4c99bef09f..fda0e9eb5a 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -148,7 +148,7 @@ def _save_pretrained(self, save_directory: Union[str, Path]): The directory where to save the model files. """ dst_path = os.path.join(save_directory, OV_XML_FILE_NAME) - openvino.runtime.serialize(self.model, dst_path) + openvino.save_model(self.model, dst_path, compress_to_fp16=False) @classmethod def _from_pretrained( @@ -199,7 +199,7 @@ def _from_pretrained( model_save_dir = model_id # Download the model from the hub else: - model_file_names = [file_name] + model_file_names = [file_name] if from_onnx else [] # If not ONNX then OpenVINO IR if not from_onnx: model_file_names.append(file_name.replace(".xml", ".bin")) diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py index 5a5e195845..eca3f661a8 100644 --- a/optimum/intel/openvino/modeling_base_seq2seq.py +++ b/optimum/intel/openvino/modeling_base_seq2seq.py @@ -105,7 +105,7 @@ def _save_pretrained(self, save_directory: Union[str, Path]): for src_file, dst_file_name in zip(src_files, dst_file_names): dst_path = os.path.join(save_directory, dst_file_name) - openvino.runtime.serialize(src_file, dst_path) + openvino.save_model(src_file, dst_path, compress_to_fp16=False) @classmethod def _from_pretrained( diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index b5cbc2ba48..85d5f8230f 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -190,7 +190,7 @@ def _save_pretrained(self, save_directory: Union[str, Path]): """ model_to_save = self.model if self._pkv_precision == Type.f32 else self._original_model dst_path = os.path.join(save_directory, OV_XML_FILE_NAME) - openvino.runtime.serialize(model_to_save, dst_path) + openvino.save_model(model_to_save, dst_path, compress_to_fp16=False) @classmethod def _from_transformers( diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index c807f61b3f..21e8323394 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -156,7 +156,7 @@ def _save_pretrained(self, save_directory: Union[str, Path]): if ov_model is not None: dst_path = save_directory / dst_path / OV_XML_FILE_NAME dst_path.parent.mkdir(parents=True, exist_ok=True) - openvino.runtime.serialize(ov_model.model, dst_path) + openvino.save_model(ov_model.model, dst_path, compress_to_fp16=False) model_dir = ov_model.config.get("_name_or_path", None) or ov_model._model_dir / ov_model._model_name config_path = Path(model_dir) / ov_model.CONFIG_NAME if config_path.is_file(): diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 388f2cdfdf..0fb8d9044d 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -414,7 +414,7 @@ def _quantize_torchmodel( @staticmethod def _save_pretrained(model: openvino.runtime.Model, output_path: str): compress_quantize_weights_transformation(model) - openvino.runtime.serialize(model, output_path) + openvino.save_model(model, output_path, compress_to_fp16=False) def _set_task(self): if self.task is None: diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py index 22d402c80f..e09293739f 100644 --- a/optimum/intel/openvino/trainer.py +++ b/optimum/intel/openvino/trainer.py @@ -772,7 +772,7 @@ def _save(self, output_dir: Optional[str] = None, state_dict=None): compress_quantize_weights_transformation(ov_model) # Serialize IR xml and bin - save_model(ov_model, output_path) + save_model(ov_model, output_path, compress_to_fp16=False) def _get_compression_controller_by_cls( self, controller_cls: Type[PTCompressionAlgorithmController] From 989432c5eb39ef675672346958abcdbaf54fc6b3 Mon Sep 17 00:00:00 2001 From: eaidova Date: Sun, 13 Aug 2023 16:02:44 +0400 Subject: [PATCH 23/38] fix llama export in quantization flow --- optimum/exporters/openvino/convert.py | 70 +++++++++++++++------- optimum/intel/openvino/modeling_decoder.py | 11 ++-- optimum/intel/openvino/quantization.py | 43 +++++++------ optimum/intel/utils/modeling_utils.py | 10 ++++ 4 files changed, 87 insertions(+), 47 deletions(-) diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 86c4ed7725..49b54346e5 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -59,6 +59,7 @@ def export( device: str = "cpu", input_shapes: Optional[Dict] = None, model_kwargs: Optional[Dict[str, Any]] = None, + from_onnx: bool = False, ) -> Tuple[List[str], List[str]]: """ Exports a Pytorch or TensorFlow model to an OpenVINO Intermediate Representation. @@ -93,7 +94,15 @@ def export( if is_torch_available() and isinstance(model, nn.Module): return export_pytorch( - model, config, opset, output, device=device, input_shapes=input_shapes, model_kwargs=model_kwargs + model, + config, + opset, + output, + device=device, + input_shapes=input_shapes, + model_kwargs=model_kwargs, + opset=opset, + from_onnx=from_onnx, ) elif is_tf_available() and issubclass(type(model), TFPreTrainedModel): @@ -112,6 +121,37 @@ def export( ) +def export_pytorch_via_onnx( + model: Union["PreTrainedModel", "ModelMixin"], + config: OnnxConfig, + opset: int, + output: Path, + device: str = "cpu", + input_shapes: Optional[Dict] = None, + model_kwargs: Optional[Dict[str, Any]] = None, +): + import torch + + orig_torch_onnx_export = torch.onnx.export + torch.onnx.export = functools.partial(orig_torch_onnx_export, do_constant_folding=False) + model.config.torchscript = False + model.config.return_dict = True + onnx_output = ( + output.with_suffix(".onnx") if not output.name != OV_XML_FILE_NAME else output.parent / ONNX_WEIGHTS_NAME + ) + input_names, output_names = export_pytorch_to_onnx( + model, config, opset, onnx_output, device, input_shapes, model_kwargs + ) + torch.onnx.export = orig_torch_onnx_export + ov_model = convert_model(str(onnx_output)) + save_model( + ov_model, + output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output, + compress_to_fp16=False, + ) + return input_names, output_names, True + + def export_pytorch( model: Union["PreTrainedModel", "ModelMixin"], config: OnnxConfig, @@ -120,6 +160,7 @@ def export_pytorch( device: str = "cpu", input_shapes: Optional[Dict] = None, model_kwargs: Optional[Dict[str, Any]] = None, + from_onnx: bool = False, ) -> Tuple[List[str], List[str]]: """ Exports a PyTorch model to an OpenVINO Intermediate Representation. @@ -148,6 +189,8 @@ def export_pytorch( logger.info(f"Using framework PyTorch: {torch.__version__}") output = Path(output) + if from_onnx: + return export_pytorch_via_onnx(model, config, opset, output, device, input_shapes, model_kwargs) with torch.no_grad(): model.config.return_dict = True @@ -200,28 +243,9 @@ def ts_patched_forward(*args, **kwargs): patcher.patched_forward = ts_patched_forward with patcher: ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) - except Exception: - orig_torch_onnx_export = torch.onnx.export - - torch.onnx.export = functools.partial(orig_torch_onnx_export, do_constant_folding=True) - model.config.torchscript = False - model.config.return_dict = True - onnx_output = ( - output.with_suffix(".onnx") - if not output.name != OV_XML_FILE_NAME - else output.parent / ONNX_WEIGHTS_NAME - ) - input_names, output_names = export_pytorch_to_onnx( - model, config, opset, onnx_output, device, input_shapes, model_kwargs - ) - torch.onnx.export = orig_torch_onnx_export - ov_model = convert_model(str(onnx_output)) - save_model( - ov_model, - output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output, - compress_to_fp16=False, - ) - return input_names, output_names, True + except Exception as ex: + logger.warning(f"Export model to OpenVINO directly failed with: \n{ex}.\nModel will be exported to ONNX") + return export_pytorch_via_onnx(model, config, opset, output, device, input_shapes, model_kwargs) clear_class_registry() ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} ordered_input_names = list(inputs) diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 85d5f8230f..83a05c4d26 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -31,8 +31,12 @@ from optimum.utils import NormalizedConfigManager from ..utils.import_utils import is_transformers_version +<<<<<<< HEAD from ..utils.modeling_utils import _prepare_attn_mask, _prepare_decoder_attention_mask from .export import export, is_torch_model +======= +from ..utils.modeling_utils import patch_decoder_attention_mask +>>>>>>> fix llama export in quantization flow from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, STR_TO_OV_TYPE @@ -232,12 +236,7 @@ def _from_transformers( onnx_config = onnx_config_constructor(model.config, use_past=use_cache) # TODO : create ModelPatcher to patch each architecture - if config.model_type == "bloom": - model.transformer._prepare_attn_mask = _prepare_attn_mask - elif config.model_type == "llama": - model.model._prepare_decoder_attention_mask = _prepare_decoder_attention_mask - elif config.model_type in {"blenderbot-small", "blenderbot", "opt", "pegasus", "bart"}: - model.model.decoder._prepare_decoder_attention_mask = _prepare_decoder_attention_mask + model = patch_decoder_attention_mask(model) # Export the model to the ONNX format export(model=model, config=onnx_config, output=save_dir_path / ONNX_WEIGHTS_NAME) diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 0fb8d9044d..f9c0524747 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -34,13 +34,12 @@ from transformers import DataCollator, PreTrainedModel, default_data_collator from transformers.pytorch_utils import Conv1D -from optimum.exporters.onnx import export as onnx_export from optimum.exporters.tasks import TasksManager from optimum.quantization_base import OptimumQuantizer from ..utils.constant import _TASK_ALIASES -from .configuration import INT8_WEIGHT_COMPRESSION_CONFIG, OVConfig -from .export import export +from ..utils.modeling_utils import patch_decoder_attention_mask +from .configuration import OVConfig from .modeling_base import OVBaseModel from .modeling_decoder import OVBaseDecoderModel from .utils import ( @@ -336,8 +335,9 @@ def _quantize_torchmodel( self._set_task() save_directory = Path(save_directory) save_directory.mkdir(parents=True, exist_ok=True) - file_name = file_name if file_name is not None else OV_XML_FILE_NAME - output_path = save_directory.joinpath(file_name) + ov_file_name = file_name if file_name is not None else OV_XML_FILE_NAME + onnx_file_name = Path(file_name).with_suffix(".onnx") if file_name is not None else ONNX_WEIGHTS_NAME + output_path = save_directory.joinpath(ov_file_name) output_path = output_path.with_suffix(".xml").as_posix() model_type = self.model.config.model_type.replace("_", "-") @@ -379,36 +379,43 @@ def _quantize_torchmodel( self.model.config.save_pretrained(save_directory) if task == "text-generation": + model = patch_decoder_attention_mask(model) onnx_config = onnx_config_class(model.config, use_past=model.config.use_cache) else: onnx_config = onnx_config_class(model.config) - model_path = save_directory / (ONNX_WEIGHTS_NAME if quantization_config.save_onnx_model else OV_XML_FILE_NAME) - if quantization_config.save_onnx_model: - # Export the model to the ONNX format - opset = min(onnx_config.DEFAULT_ONNX_OPSET, MAX_ONNX_OPSET) - opset = max(opset, MIN_ONNX_QDQ_OPSET) - onnx_export( - model=compressed_model, - config=onnx_config, - opset=opset, - output=model_path, - ) - + model_path = save_directory / onnx_file_name if quantization_config.save_onnx_model else ov_file_name + onnx_path = save_directory / onnx_file_name + opset = min(onnx_config.DEFAULT_ONNX_OPSET, MAX_ONNX_OPSET) + opset = max(opset, MIN_ONNX_QDQ_OPSET) + _, _, is_onnx = export( + model=compressed_model, + config=onnx_config, + output=model_path, + opset=opset, + from_onnx=quantization_config.save_onnx_model, + ) + if is_onnx: # Load and save the compressed model - model = core.read_model(model_path) + model = core.read_model(onnx_path) self._save_pretrained(model, output_path) +<<<<<<< HEAD else: _, _, is_onnx = export(model=compressed_model, config=onnx_config, output=output_path) if is_onnx: onnx_path = output_path.replace(".xml", ".onnx") model = core.read_model(onnx_path) self._save_pretrained(model, output_path) +======= + # if onnx conversion happens as fallback for pytorch conversion, remove onnx model + if not quantization_config.save_onnx_model: +>>>>>>> fix llama export in quantization flow os.remove(onnx_path) try: os.remove(f"{onnx_path}_data") except FileNotFoundError: pass + quantization_config.save_pretrained(save_directory) @staticmethod diff --git a/optimum/intel/utils/modeling_utils.py b/optimum/intel/utils/modeling_utils.py index c7be049990..924c65d10a 100644 --- a/optimum/intel/utils/modeling_utils.py +++ b/optimum/intel/utils/modeling_utils.py @@ -89,3 +89,13 @@ def _prepare_decoder_attention_mask(attention_mask, input_shape, inputs_embeds, ) return combined_attention_mask + + +def patch_decoder_attention_mask(model): + if model.config.model_type == "bloom": + model.transformer._prepare_attn_mask = _prepare_attn_mask + elif model.config.model_type == "llama": + model.model._prepare_decoder_attention_mask = _prepare_decoder_attention_mask + elif model.config.model_type in {"blenderbot-small", "blenderbot", "opt", "pegasus", "bart"}: + model.model.decoder._prepare_decoder_attention_mask = _prepare_decoder_attention_mask + return model From b88a92b24b71a6ebd24d231f9bd1bd4710af1d73 Mon Sep 17 00:00:00 2001 From: eaidova Date: Sun, 13 Aug 2023 17:36:24 +0400 Subject: [PATCH 24/38] rebase with fixes --- optimum/exporters/openvino/__init__.py | 5 + optimum/exporters/openvino/convert.py | 1 - optimum/intel/openvino/export.py | 673 ------------------ optimum/intel/openvino/modeling_base.py | 11 +- .../intel/openvino/modeling_base_seq2seq.py | 2 +- optimum/intel/openvino/modeling_decoder.py | 7 +- optimum/intel/openvino/modeling_diffusion.py | 2 +- optimum/intel/openvino/quantization.py | 21 +- setup.py | 2 +- 9 files changed, 21 insertions(+), 703 deletions(-) create mode 100644 optimum/exporters/openvino/__init__.py delete mode 100644 optimum/intel/openvino/export.py diff --git a/optimum/exporters/openvino/__init__.py b/optimum/exporters/openvino/__init__.py new file mode 100644 index 0000000000..9dc8b1833d --- /dev/null +++ b/optimum/exporters/openvino/__init__.py @@ -0,0 +1,5 @@ +from .__main__ import main_export +from .convert import export, export_models + + +__all__ = ["main_export", "export", "export_models"] diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 49b54346e5..800d0742f4 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -101,7 +101,6 @@ def export( device=device, input_shapes=input_shapes, model_kwargs=model_kwargs, - opset=opset, from_onnx=from_onnx, ) diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py deleted file mode 100644 index e82d3a66c6..0000000000 --- a/optimum/intel/openvino/export.py +++ /dev/null @@ -1,673 +0,0 @@ -import functools -import gc -import inspect -import logging -import os -from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Tuple, Union - -from openvino import PartialShape, convert_model, serialize -from openvino.runtime.utils.types import get_element_type -from requests.exceptions import ConnectionError as RequestsConnectionError -from transformers import AutoTokenizer -from transformers.utils import is_tf_available, is_torch_available -from transformers import AutoTokenizer -from transformers.utils import is_tf_available, is_torch_available - -from optimum.exporters import TasksManager -from optimum.exporters.onnx import __main__ -from optimum.exporters.onnx.base import OnnxConfig, OnnxConfigWithPast -from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed, export_tensorflow -from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx -from optimum.utils import DEFAULT_DUMMY_SHAPES, is_diffusers_available -from optimum.utils.save_utils import maybe_save_preprocessors - -from .utils import OV_XML_FILE_NAME - - -logger = logging.getLogger(__name__) - -if is_torch_available(): - import torch - import torch.nn as nn - from transformers.modeling_utils import PreTrainedModel - -if is_diffusers_available(): - from diffusers import ModelMixin - -if is_tf_available(): - from transformers.modeling_tf_utils import TFPreTrainedModel - -def is_torch_model(model): - if not is_torch_available(): - return False - return isinstance(model, nn.Module) - - -def export( - model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], - config: OnnxConfig, - output: Path, - opset: Optional[int] = None, - device: str = "cpu", - input_shapes: Optional[Dict] = None, - model_kwargs: Optional[Dict[str, Any]] = None, -) -> Tuple[List[str], List[str]]: - """ - Exports a Pytorch or TensorFlow model to an ONNX Intermediate Representation. - - Args: - model ([`PreTrainedModel`] or [`TFPreTrainedModel`]): - The model to export. - config ([`~exporters.onnx.config.OnnxConfig`]): - The ONNX configuration associated with the exported model. - output (`Path`): - Directory to store the exported ONNX model. - opset (`Optional[int]`, defaults to `None`): - The version of the ONNX operator set to use. - device (`str`, *optional*, defaults to `cpu`): - The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for - export on CUDA devices. - input_shapes (`Optional[Dict]`, defaults to `None`): - If specified, allows to use specific shapes for the example input provided to the ONNX exporter. - - Returns: - `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from - the ONNX configuration. - """ - if not (is_torch_available() or is_tf_available()): - raise ImportError( - "Cannot convert because neither PyTorch nor TensorFlow are installed. " - "Please install torch or tensorflow first." - ) - - if "diffusers" in str(model.__class__) and not is_diffusers_available(): - raise ImportError("The pip package `diffusers` is required to export stable diffusion models to ONNX.") - - if is_torch_available() and isinstance(model, nn.Module): - return export_pytorch( - model, config, opset, output, device=device, input_shapes=input_shapes, model_kwargs=model_kwargs - ) - - elif is_tf_available() and issubclass(type(model), TFPreTrainedModel): - output.parent.mkdir(parents=True, exist_ok=True) - if opset is None: - opset = config.DEFAULT_ONNX_OPSET - if device == "cuda": - raise RuntimeError("`tf2onnx` does not support export on CUDA device.") - if input_shapes is not None: - logger.info("`input_shapes` argument is not supported by the Tensorflow ONNX export and will be ignored.") - return export_tensorflow(model, config, opset, output) - - else: - raise RuntimeError( - "You either provided a PyTorch model with only TensorFlow installed, or a TensorFlow model with only PyTorch installed." - ) - - -def export_pytorch( - model: Union["PreTrainedModel", "ModelMixin"], - config: OnnxConfig, - opset: int, - output: Path, - device: str = "cpu", - input_shapes: Optional[Dict] = None, - model_kwargs: Optional[Dict[str, Any]] = None, -) -> Tuple[List[str], List[str]]: - """ - Exports a PyTorch model to an ONNX Intermediate Representation. - - Args: - model ([`PreTrainedModel`]): - The model to export. - config ([`~exporters.onnx.config.OnnxConfig`]): - The ONNX configuration associated with the exported model. - opset (`int`): - The version of the ONNX operator set to use. - output (`Path`): - Directory to store the exported ONNX model. - device (`str`, defaults to `"cpu"`): - The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for - export on CUDA devices. - input_shapes (`optional[Dict]`, defaults to `None`): - If specified, allows to use specific shapes for the example input provided to the ONNX exporter. - - Returns: - `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from - the ONNX configuration. - """ - import torch - from torch.utils._pytree import tree_map - - logger.info(f"Using framework PyTorch: {torch.__version__}") - output = Path(output) - - with torch.no_grad(): - model.config.return_dict = True - custom_patcher = type(config).patch_model_for_export != OnnxConfig.patch_model_for_export - model.config.torchscript = not custom_patcher - from torch.utils._pytree import tree_map - - logger.info(f"Using framework PyTorch: {torch.__version__}") - output = Path(output) - - with torch.no_grad(): - model.config.return_dict = True - custom_patcher = type(config).patch_model_for_export != OnnxConfig.patch_model_for_export - model.config.torchscript = not custom_patcher - model.eval() - - # Check if we need to override certain configuration item - if config.values_override is not None: - logger.info(f"Overriding {len(config.values_override)} configuration item(s)") - for override_config_key, override_config_value in config.values_override.items(): - logger.info(f"\t- {override_config_key} -> {override_config_value}") - setattr(model.config, override_config_key, override_config_value) - - if input_shapes is None: - input_shapes = {} # will use the defaults from DEFAULT_DUMMY_SHAPES - - # Check that inputs match, and order them properly - dummy_inputs = config.generate_dummy_inputs(framework="pt", **input_shapes) - device = torch.device(device) - if device.type == "cuda" and torch.cuda.is_available(): - model.to(device) - dummy_inputs = tree_map( - lambda value: value.to(device) if isinstance(value, torch.Tensor) else value, dummy_inputs - ) - check_dummy_inputs_are_allowed(model, dummy_inputs) - inputs = config.ordered_inputs(model) - input_names = list(inputs.keys()) - output_names = list(config.outputs.keys()) - if hasattr(model, "forward"): - sig = inspect.signature(model.forward) - else: - sig = inspect.signature(model.call) - dummy_inputs = remove_none_from_dummy_inputs(dummy_inputs) - input_info = get_input_shapes(dummy_inputs, inputs) - try: - if custom_patcher: - patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs) - patched_forward = patcher.patched_forward - - @functools.wraps(patched_forward) - def ts_patched_forward(*args, **kwargs): - outputs = patched_forward(*args, **kwargs) - return tuple(outputs.values()) - - patcher.patched_forward = ts_patched_forward - with patcher: - ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) - else: - ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) - except Exception: - model.config.torchscript = False - model.config.return_dict = True - onnx_output = output.with_suffix(".onnx") - input_names, output_names = export_pytorch_to_onnx( - model, config, opset, onnx_output, device, input_shapes, model_kwargs - ) - ov_model = convert_model(onnx_output) - serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) - return input_names, output_names, True - clear_class_registry() - ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} - ordered_input_names = list(inputs) - flatten_inputs = flattenize_inputs(ordered_dummy_inputs.values()) - for idx, out_tensor in enumerate(ov_model.outputs): - if idx < len(output_names): - out_tensor.get_tensor().set_names({output_names[idx]}) - - input_info = get_input_shapes(dummy_inputs, inputs) - try: - if custom_patcher: - patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs) - patched_forward = patcher.patched_forward - - @functools.wraps(patched_forward) - def ts_patched_forward(*args, **kwargs): - outputs = patched_forward(*args, **kwargs) - return tuple(outputs.values()) - - patcher.patched_forward = ts_patched_forward - with patcher: - ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) - else: - ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) - except Exception: - model.config.torchscript = False - model.config.return_dict = True - onnx_output = output.with_suffix(".onnx") - input_names, output_names = export_pytorch_to_onnx( - model, config, opset, onnx_output, device, input_shapes, model_kwargs - ) - ov_model = convert_model(onnx_output) - serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) - return input_names, output_names, True - - ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} - ordered_input_names = list(inputs) - flatten_inputs = flattenize_inputs(ordered_dummy_inputs.values()) - for idx, out_tensor in enumerate(ov_model.outputs): - if idx < len(output_names): - out_tensor.get_tensor().set_names({output_names[idx]}) - for idx, inp_tensor in enumerate(ov_model.inputs): - input_name = ordered_input_names[idx] - inp_tensor.get_tensor().set_names({input_name}) - inp_data = flatten_inputs[idx] - static_shape = PartialShape(inp_data.shape) - dims = inputs[input_name] - - for dim in dims: - static_shape[dim] = -1 - inp_tensor.get_node().set_partial_shape(static_shape) - inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) - ov_model.validate_nodes_and_infer_types() - serialize(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output) - del model - gc.collect() - return input_names, output_names, False - - -def clear_class_registry(): - torch._C._jit_clear_class_registry() - torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() - torch.jit._state._clear_class_state() - - -def export_models( - models_and_onnx_configs: Dict[ - str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], "OnnxConfig"] - ], - output_dir: Path, - opset: Optional[int] = None, - output_names: Optional[List[str]] = None, - device: str = "cpu", - input_shapes: Optional[Dict] = None, - model_kwargs: Optional[Dict[str, Any]] = None, -) -> Tuple[List[List[str]], List[List[str]]]: - """ - Exports a Pytorch or TensorFlow encoder decoder model to an ONNX Intermediate Representation. - The following method exports the encoder and decoder components of the model as separate - ONNX files. - - Args: - models_and_onnx_configs (`Dict[str, Tuple[Union[`PreTrainedModel`, `TFPreTrainedModel`], `OnnxConfig`]]): - A dictionnary containing the models to export and their corresponding onnx configs. - output_dir (`Path`): - Output directory to store the exported ONNX models. - opset (`Optional[int]`, defaults to `None`): - The version of the ONNX operator set to use. - output_names (`Optional[List[str]]`, defaults to `None`): - The names to use for the exported ONNX files. The order must be the same as the order of submodels in the ordered dict `models_and_onnx_configs`. - If None, will use the keys from `models_and_onnx_configs` as names. - device (`str`, defaults to `"cpu"`): - The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for - export on CUDA devices. - input_shapes (`Optional[Dict]`, defaults to `None`): - If specified, allows to use specific shapes for the example input provided to the ONNX exporter. - Returns: - `Tuple[List[List[str]], List[List[str]]]`: A tuple with an ordered list of the model's inputs, and the named - inputs from the ONNX configuration. - """ - outputs = [] - - if output_names is not None and len(output_names) != len(models_and_onnx_configs): - raise ValueError( - f"Provided custom names {output_names} for the export of {len(models_and_onnx_configs)} models. Please provide the same number of names as models to export." - ) - - for i, model_name in enumerate(models_and_onnx_configs.keys()): - submodel, sub_onnx_config = models_and_onnx_configs[model_name] - output_name = output_names[i] if output_names is not None else Path(model_name + ".xml") - output_path = output_dir / output_name - output_path.parent.mkdir(parents=True, exist_ok=True) - outputs.append( - export( - model=submodel, - config=sub_onnx_config, - output=output_path, - opset=opset, - device=device, - input_shapes=input_shapes, - model_kwargs=model_kwargs, - ) - ) - - outputs = list(map(list, zip(*outputs))) - return outputs - - -def flattenize_inputs(inputs): - flatten_inputs = [] - for input_data in inputs: - if input_data is None: - continue - if isinstance(input_data, (list, tuple)): - flatten_inputs.extend(flattenize_inputs(input_data)) - else: - flatten_inputs.append(input_data) - return flatten_inputs - - -def remove_none_from_dummy_inputs(dummy_inputs): - def remove_none_from_list_tuple(item): - new_item = [i for i in item if i is not None] - return type(item)(new_item) - - upd_dummy = {} - for k, v in dummy_inputs.items(): - if v is None: - continue - if isinstance(v, dict): - for kk, vv in v.items(): - upd_dummy[kk] = vv - continue - if isinstance(v, (tuple, list)): - upd_dummy[k] = remove_none_from_list_tuple(v) - continue - upd_dummy[k] = v - return upd_dummy - - -def get_input_shapes(dummy_inputs, inputs): - input_info = [] - for input_name, data in dummy_inputs.items(): - if isinstance(data, (tuple, list, dict)): - return None - static_shape = PartialShape(data.shape) - if input_name in inputs: - dynamic_dims = inputs[input_name] - for dim in dynamic_dims: - static_shape[dim] = -1 - input_info.append((input_name, static_shape)) - return input_info - - -def main_export( - model_name_or_path: str, - output: Union[str, Path], - task: str = "auto", - device: str = "cpu", - fp16: Optional[bool] = False, - monolith: bool = False, - framework: Optional[str] = None, - cache_dir: Optional[str] = None, - trust_remote_code: bool = False, - pad_token_id: Optional[int] = None, - subfolder: str = "", - revision: str = "main", - force_download: bool = False, - local_files_only: bool = False, - use_auth_token: Optional[Union[bool, str]] = None, - model_kwargs: Optional[Dict[str, Any]] = None, - custom_onnx_configs: Optional[Dict[str, "OnnxConfig"]] = None, - fn_get_submodels: Optional[Callable] = None, - **kwargs_shapes, -): - """ - Full-suite ONNX export. - - Args: - > Required parameters - - model_name_or_path (`str`): - Model ID on huggingface.co or path on disk to the model repository to export. - output (`Union[str, Path]`): - Path indicating the directory where to store the generated ONNX model. - - > Optional parameters - - task (`Optional[str]`, defaults to `None`): - The task to export the model for. If not specified, the task will be auto-inferred based on the model. For decoder models, - use `xxx-with-past` to export the model using past key values in the decoder. - opset (`Optional[int]`, defaults to `None`): - If specified, ONNX opset version to export the model with. Otherwise, the default opset for the given model architecture - will be used. - device (`str`, defaults to `"cpu"`): - The device to use to do the export. Defaults to "cpu". - fp16 (`Optional[bool]`, defaults to `"False"`): - Use half precision during the export. PyTorch-only, requires `device="cuda"`. - optimize (`Optional[str]`, defaults to `None`): - Allows to run ONNX Runtime optimizations directly during the export. Some of these optimizations are specific to - ONNX Runtime, and the resulting ONNX will not be usable with other runtime as OpenVINO or TensorRT. - Available options: `"O1", "O2", "O3", "O4"`. Reference: [`~optimum.onnxruntime.AutoOptimizationConfig`] - monolith (`bool`, defaults to `False`): - Forces to export the model as a single ONNX file. - no_post_process (`bool`, defaults to `False`): - Allows to disable any post-processing done by default on the exported ONNX models. - framework (`Optional[str]`, defaults to `None`): - The framework to use for the ONNX export (`"pt"` or `"tf"`). If not provided, will attempt to automatically detect - the framework for the checkpoint. - atol (`Optional[float]`, defaults to `None`): - If specified, the absolute difference tolerance when validating the model. Otherwise, the default atol for the model will be used. - cache_dir (`Optional[str]`, defaults to `None`): - Path indicating where to store cache. The default Hugging Face cache path will be used by default. - trust_remote_code (`bool`, defaults to `False`): - Allows to use custom code for the modeling hosted in the model repository. This option should only be set for repositories - you trust and in which you have read the code, as it will execute on your local machine arbitrary code present in the - model repository. - pad_token_id (`Optional[int]`, defaults to `None`): - This is needed by some models, for some tasks. If not provided, will attempt to use the tokenizer to guess it. - subfolder (`str`, defaults to `""`): - In case the relevant files are located inside a subfolder of the model repo either locally or on huggingface.co, you can - specify the folder name here. - revision (`str`, defaults to `"main"`): - Revision is the specific model version to use. It can be a branch name, a tag name, or a commit id. - force_download (`bool`, defaults to `False`): - Whether or not to force the (re-)download of the model weights and configuration files, overriding the - cached versions if they exist. - local_files_only (`Optional[bool]`, defaults to `False`): - Whether or not to only look at local files (i.e., do not try to download the model). - use_auth_token (`Optional[str]`, defaults to `None`): - The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated - when running `transformers-cli login` (stored in `~/.huggingface`). - model_kwargs (`Optional[Dict[str, Any]]`, defaults to `None`): - Experimental usage: keyword arguments to pass to the model during - the export. This argument should be used along the `custom_onnx_configs` argument - in case, for example, the model inputs/outputs are changed (for example, if - `model_kwargs={"output_attentions": True}` is passed). - custom_onnx_configs (`Optional[Dict[str, OnnxConfig]]`, defaults to `None`): - Experimental usage: override the default ONNX config used for the given model. This argument may be useful for advanced users that desire a finer-grained control on the export. An example is available [here](https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model). - fn_get_submodels (`Optional[Callable]`, defaults to `None`): - Experimental usage: Override the default submodels that are used at the export. This is - especially useful when exporting a custom architecture that needs to split the ONNX (e.g. encoder-decoder). If unspecified with custom models, optimum will try to use the default submodels used for the given task, with no guarantee of success. - use_subprocess (`bool`): - Do the ONNX exported model validation in subprocesses. This is especially useful when - exporting on CUDA device, where ORT does not release memory at inference session - destruction. When set to `True`, the `main_export` call should be guarded in - `if __name__ == "__main__":` block. - **kwargs_shapes (`Dict`): - Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export. - - Example usage: - ```python - >>> from optimum.exporters.onnx import main_export - - >>> main_export("gpt2", output="gpt2_onnx/") - ``` - """ - if (framework == "tf" and fp16 is True) or not is_torch_available(): - raise ValueError("The --fp16 option is supported only for PyTorch.") - - if fp16 is True and device == "cpu": - raise ValueError( - "The --fp16 option is supported only when exporting on GPU. Please pass the option `--device cuda`." - ) - - output = Path(output) - if not output.exists(): - output.mkdir(parents=True) - original_task = task - task = TasksManager.map_from_synonym(task) - - framework = TasksManager.determine_framework(model_name_or_path, subfolder=subfolder, framework=framework) - - # get the shapes to be used to generate dummy inputs - input_shapes = {} - for input_name in DEFAULT_DUMMY_SHAPES.keys(): - input_shapes[input_name] = ( - kwargs_shapes[input_name] if input_name in kwargs_shapes else DEFAULT_DUMMY_SHAPES[input_name] - ) - - torch_dtype = None if fp16 is False else torch.float16 - - if task == "auto": - try: - task = TasksManager.infer_task_from_model(model_name_or_path) - except KeyError as e: - raise KeyError( - f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" - ) - except RequestsConnectionError as e: - raise RequestsConnectionError( - f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" - ) - - model = TasksManager.get_model_from_task( - task, - model_name_or_path, - subfolder=subfolder, - revision=revision, - cache_dir=cache_dir, - use_auth_token=use_auth_token, - local_files_only=local_files_only, - force_download=force_download, - trust_remote_code=trust_remote_code, - framework=framework, - torch_dtype=torch_dtype, - device=device, - ) - - custom_architecture = False - is_stable_diffusion = "stable-diffusion" in task - model_type = "stable-diffusion" if is_stable_diffusion else model.config.model_type.replace("_", "-") - - if not is_stable_diffusion: - if model_type in TasksManager._UNSUPPORTED_CLI_MODEL_TYPE: - raise ValueError( - f"{model_type} is not supported yet. Only {TasksManager._SUPPORTED_CLI_MODEL_TYPE} are supported. " - f"If you want to support {model_type} please propose a PR or open up an issue." - ) - if model.config.model_type.replace("-", "_") not in TasksManager.get_supported_model_type_for_task( - task, exporter="onnx" - ): - custom_architecture = True - - # TODO: support onnx_config.py in the model repo - if custom_architecture and custom_onnx_configs is None: - raise ValueError( - "Trying to export a model with a custom architecture, but no custom onnx configuration was passed as `custom_onnx_configs`. Please refer to https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#custom-export-of-transformers-models for an example on how to export custom models." - ) - - if custom_architecture and original_task == "auto": - raise ValueError( - f'Automatic task detection is not supported with custom architectures. Please specify the `task` argument. Suggestion: task="{task}" (or task="{task}-with-past" if the model is decoder-based and supports KV cache)' - ) - - if ( - not custom_architecture - and not is_stable_diffusion - and task + "-with-past" in TasksManager.get_supported_tasks_for_model_type(model_type, "onnx") - ): - if original_task == "auto": # Make -with-past the default if --task was not explicitely specified - task = task + "-with-past" - else: - logger.info( - f"The task `{task}` was manually specified, and past key values will not be reused in the decoding." - f" if needed, please pass `--task {task}-with-past` to export using the past key values." - ) - - if task.endswith("-with-past") and monolith is True: - task_non_past = task.replace("-with-past", "") - raise ValueError( - f"The task {task} is not compatible with the --monolith argument. Please either use" - f" `--task {task_non_past} --monolith`, or `--task {task}` without the monolith argument." - ) - - if original_task == "auto": - synonyms_for_task = sorted(TasksManager.synonyms_for_task(task)) - if synonyms_for_task: - synonyms_for_task = ", ".join(synonyms_for_task) - possible_synonyms = f" (possible synonyms are: {synonyms_for_task})" - else: - possible_synonyms = "" - logger.info(f"Automatic task detection to {task}{possible_synonyms}.") - - onnx_config, models_and_onnx_configs = __main__._get_submodels_and_onnx_configs( - model=model, - task=task, - monolith=monolith, - custom_onnx_configs=custom_onnx_configs if custom_onnx_configs is not None else {}, - custom_architecture=custom_architecture, - fn_get_submodels=fn_get_submodels, - ) - - if not is_stable_diffusion: - needs_pad_token_id = ( - isinstance(onnx_config, OnnxConfigWithPast) - and getattr(model.config, "pad_token_id", None) is None - and task in ["text-classification"] - ) - if needs_pad_token_id: - if pad_token_id is not None: - model.config.pad_token_id = pad_token_id - else: - try: - tok = AutoTokenizer.from_pretrained(model_name_or_path) - model.config.pad_token_id = tok.pad_token_id - except Exception: - raise ValueError( - "Could not infer the pad token id, which is needed in this case, please provide it with the --pad_token_id argument" - ) - # Saving the model config and preprocessor as this is needed sometimes. - model.config.save_pretrained(output) - generation_config = getattr(model, "generation_config", None) - if generation_config is not None: - generation_config.save_pretrained(output) - maybe_save_preprocessors(model_name_or_path, output) - - if model.config.is_encoder_decoder and task.startswith("text-generation"): - raise ValueError( - f"model.config.is_encoder_decoder is True and task is `{task}`, which are incompatible. If the task was auto-inferred, please fill a bug report" - f"at https://github.com/huggingface/optimum, if --task was explicitely passed, make sure you selected the right task for the model," - f" referring to `optimum.exporters.tasks.TaskManager`'s `_TASKS_TO_AUTOMODELS`." - ) - - files_subpaths = None - else: - # save the subcomponent configuration - for model_name in models_and_onnx_configs: - subcomponent = models_and_onnx_configs[model_name][0] - if hasattr(subcomponent, "save_config"): - subcomponent.save_config(output / model_name) - elif hasattr(subcomponent, "config") and hasattr(subcomponent.config, "save_pretrained"): - subcomponent.config.save_pretrained(output / model_name) - - files_subpaths = [os.path.join(name_dir, OV_XML_FILE_NAME) for name_dir in models_and_onnx_configs] - - # Saving the additional components needed to perform inference. - model.scheduler.save_pretrained(output.joinpath("scheduler")) - - feature_extractor = getattr(model, "feature_extractor", None) - if feature_extractor is not None: - feature_extractor.save_pretrained(output.joinpath("feature_extractor")) - - tokenizer = getattr(model, "tokenizer", None) - if tokenizer is not None: - tokenizer.save_pretrained(output.joinpath("tokenizer")) - - tokenizer_2 = getattr(model, "tokenizer_2", None) - if tokenizer_2 is not None: - tokenizer_2.save_pretrained(output.joinpath("tokenizer_2")) - - model.save_config(output) - - export_models( - models_and_onnx_configs=models_and_onnx_configs, - output_dir=output, - output_names=files_subpaths, - input_shapes=input_shapes, - device=device, - model_kwargs=model_kwargs, - ) diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index fda0e9eb5a..b06670dffa 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -29,8 +29,9 @@ from optimum.exporters.tasks import TasksManager from optimum.modeling_base import OptimizedModel +from ...exporters.openvino import export +from ...exporters.openvino.utils import is_torch_model from ..utils.import_utils import is_transformers_version -from .export import export, is_torch_model from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME @@ -129,10 +130,7 @@ def fix_op_names_duplicates(model: openvino.runtime.Model): if isinstance(file_name, str): file_name = Path(file_name) - bin_file_name = file_name.with_suffix(".bin") if file_name.suffix == ".xml" else None - model = ( - core.read_model(file_name, bin_file_name) if not file_name.suffix == ".onnx" else convert_model(file_name) - ) + model = core.read_model(file_name) if not file_name.suffix == ".onnx" else convert_model(file_name) if file_name.suffix == ".onnx": model = fix_op_names_duplicates(model) # should be called during model conversion to IR @@ -199,8 +197,9 @@ def _from_pretrained( model_save_dir = model_id # Download the model from the hub else: - model_file_names = [file_name] if from_onnx else [] + model_file_names = [file_name] # If not ONNX then OpenVINO IR + if not from_onnx: model_file_names.append(file_name.replace(".xml", ".bin")) file_names = [] diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py index eca3f661a8..f8e09b2c91 100644 --- a/optimum/intel/openvino/modeling_base_seq2seq.py +++ b/optimum/intel/openvino/modeling_base_seq2seq.py @@ -27,8 +27,8 @@ from optimum.exporters import TasksManager from optimum.exporters.onnx import get_encoder_decoder_models_for_export +from ...exporters.openvino import export_models from ..utils.import_utils import is_transformers_version -from .export import export_models from .modeling_base import OVBaseModel from .utils import ( ONNX_DECODER_NAME, diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 83a05c4d26..966bec52f4 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -30,13 +30,10 @@ from optimum.exporters import TasksManager from optimum.utils import NormalizedConfigManager +from ...exporters.openvino import export +from ...exporters.openvino.utils import is_torch_model from ..utils.import_utils import is_transformers_version -<<<<<<< HEAD -from ..utils.modeling_utils import _prepare_attn_mask, _prepare_decoder_attention_mask -from .export import export, is_torch_model -======= from ..utils.modeling_utils import patch_decoder_attention_mask ->>>>>>> fix llama export in quantization flow from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, STR_TO_OV_TYPE diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index 21e8323394..8de9ead5f5 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -49,8 +49,8 @@ DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, ) +from ...exporters.openvino import main_export from .loaders import OVTextualInversionLoaderMixin -from .export import main_export from .modeling_base import OVBaseModel from .utils import ONNX_WEIGHTS_NAME, OV_TO_NP_TYPE, OV_XML_FILE_NAME diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index f9c0524747..63083c6d89 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -30,13 +30,14 @@ from nncf.torch.initialization import PTInitializingDataLoader from openvino._offline_transformations import compress_quantize_weights_transformation from openvino.runtime import Core, Tensor -from torch.utils.data import DataLoader, RandomSampler, TensorDataset +from torch.utils.data import DataLoader, RandomSampler from transformers import DataCollator, PreTrainedModel, default_data_collator from transformers.pytorch_utils import Conv1D from optimum.exporters.tasks import TasksManager from optimum.quantization_base import OptimumQuantizer +from ...exporters.openvino import export from ..utils.constant import _TASK_ALIASES from ..utils.modeling_utils import patch_decoder_attention_mask from .configuration import OVConfig @@ -353,7 +354,7 @@ def _quantize_torchmodel( "No configuration describing the quantization process was provided, a default OVConfig will be generated." ) quantization_config = OVConfig() - + if weights_only: compressed_model = compress_weights(self.model) self.model = compressed_model @@ -377,19 +378,18 @@ def _quantize_torchmodel( task = self.task model = self.model self.model.config.save_pretrained(save_directory) - + model = patch_decoder_attention_mask(model) if task == "text-generation": - model = patch_decoder_attention_mask(model) onnx_config = onnx_config_class(model.config, use_past=model.config.use_cache) else: onnx_config = onnx_config_class(model.config) - model_path = save_directory / onnx_file_name if quantization_config.save_onnx_model else ov_file_name + model_path = save_directory / (onnx_file_name if quantization_config.save_onnx_model else ov_file_name) onnx_path = save_directory / onnx_file_name opset = min(onnx_config.DEFAULT_ONNX_OPSET, MAX_ONNX_OPSET) opset = max(opset, MIN_ONNX_QDQ_OPSET) _, _, is_onnx = export( - model=compressed_model, + model=model, config=onnx_config, output=model_path, opset=opset, @@ -399,17 +399,8 @@ def _quantize_torchmodel( # Load and save the compressed model model = core.read_model(onnx_path) self._save_pretrained(model, output_path) -<<<<<<< HEAD - else: - _, _, is_onnx = export(model=compressed_model, config=onnx_config, output=output_path) - if is_onnx: - onnx_path = output_path.replace(".xml", ".onnx") - model = core.read_model(onnx_path) - self._save_pretrained(model, output_path) -======= # if onnx conversion happens as fallback for pytorch conversion, remove onnx model if not quantization_config.save_onnx_model: ->>>>>>> fix llama export in quantization flow os.remove(onnx_path) try: os.remove(f"{onnx_path}_data") diff --git a/setup.py b/setup.py index 020d4e8826..2dd7510e96 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ "onnxruntime<1.15.0", ], "openvino": ["openvino==2023.1.0.dev20230728", "onnx", "onnxruntime"], - "nncf": ["git+https://github.com/openvinotoolkit/nncf.git"], + "nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git"], "ipex": ["transformers<4.32.0", "intel-extension-for-pytorch", "onnx"], "diffusers": ["diffusers", "invisible-watermark>=0.2.0"], "quality": QUALITY_REQUIRE, From 62634d1aeafa54e2c47014e5505b9a129f9b4651 Mon Sep 17 00:00:00 2001 From: eaidova Date: Fri, 18 Aug 2023 13:50:19 +0400 Subject: [PATCH 25/38] update prerelease package --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2dd7510e96..4812ede91a 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ "onnx", "onnxruntime<1.15.0", ], - "openvino": ["openvino==2023.1.0.dev20230728", "onnx", "onnxruntime"], + "openvino": ["openvino==2023.1.0.dev20230811", "onnx", "onnxruntime"], "nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git"], "ipex": ["transformers<4.32.0", "intel-extension-for-pytorch", "onnx"], "diffusers": ["diffusers", "invisible-watermark>=0.2.0"], From c54010fdf79894fdde36a2f075b7ea6d00ab598e Mon Sep 17 00:00:00 2001 From: eaidova Date: Fri, 18 Aug 2023 14:27:41 +0400 Subject: [PATCH 26/38] fix onnx name issues --- optimum/exporters/openvino/convert.py | 44 ++++++++++++++------------ optimum/intel/openvino/quantization.py | 7 ++-- tests/openvino/test_quantization.py | 4 +-- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 800d0742f4..eb8f20f6c9 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -29,7 +29,7 @@ from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx from optimum.utils import is_diffusers_available -from ...intel.openvino.utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME +from ...intel.openvino.utils import OV_XML_FILE_NAME from .utils import ( clear_class_registry, flattenize_inputs, @@ -135,9 +135,7 @@ def export_pytorch_via_onnx( torch.onnx.export = functools.partial(orig_torch_onnx_export, do_constant_folding=False) model.config.torchscript = False model.config.return_dict = True - onnx_output = ( - output.with_suffix(".onnx") if not output.name != OV_XML_FILE_NAME else output.parent / ONNX_WEIGHTS_NAME - ) + onnx_output = output.with_suffix(".onnx") input_names, output_names = export_pytorch_to_onnx( model, config, opset, onnx_output, device, input_shapes, model_kwargs ) @@ -192,6 +190,7 @@ def export_pytorch( return export_pytorch_via_onnx(model, config, opset, output, device, input_shapes, model_kwargs) with torch.no_grad(): + model.config.torchscript = False model.config.return_dict = True model.eval() @@ -224,23 +223,28 @@ def export_pytorch( dummy_inputs, dict_inputs = remove_none_from_dummy_inputs(dummy_inputs) input_info = get_input_shapes(dummy_inputs, inputs) + custom_patcher = type(config).patch_model_for_export != OnnxConfig.patch_model_for_export try: - patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs) - patched_forward = patcher.patched_forward - - @functools.wraps(patched_forward) - def ts_patched_forward(*args, **kwargs): - for i in range(len(dict_inputs)): - input_name = dict_inputs[i][0] - keys = dict_inputs[i][1] - tuple_input = kwargs[input_name] - input_dict = dict(zip(keys, tuple_input)) - kwargs[input_name] = input_dict - outputs = patched_forward(*args, **kwargs) - return tuple(outputs.values()) - - patcher.patched_forward = ts_patched_forward - with patcher: + if custom_patcher or dict_inputs: + patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs) + patched_forward = patcher.patched_forward + + @functools.wraps(patched_forward) + def ts_patched_forward(*args, **kwargs): + for i in range(len(dict_inputs)): + input_name = dict_inputs[i][0] + keys = dict_inputs[i][1] + tuple_input = kwargs[input_name] + input_dict = dict(zip(keys, tuple_input)) + kwargs[input_name] = input_dict + outputs = patched_forward(*args, **kwargs) + return tuple(outputs.values()) + + patcher.patched_forward = ts_patched_forward + with patcher: + ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) + else: + model.config.torchscript = True ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) except Exception as ex: logger.warning(f"Export model to OpenVINO directly failed with: \n{ex}.\nModel will be exported to ONNX") diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 63083c6d89..708be320d4 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -337,7 +337,6 @@ def _quantize_torchmodel( save_directory = Path(save_directory) save_directory.mkdir(parents=True, exist_ok=True) ov_file_name = file_name if file_name is not None else OV_XML_FILE_NAME - onnx_file_name = Path(file_name).with_suffix(".onnx") if file_name is not None else ONNX_WEIGHTS_NAME output_path = save_directory.joinpath(ov_file_name) output_path = output_path.with_suffix(".xml").as_posix() @@ -354,7 +353,11 @@ def _quantize_torchmodel( "No configuration describing the quantization process was provided, a default OVConfig will be generated." ) quantization_config = OVConfig() - + onnx_file_name = ( + ONNX_WEIGHTS_NAME + if file_name is None and quantization_config.save_onnx_model + else Path(ov_file_name).with_suffix(".onnx") + ) if weights_only: compressed_model = compress_weights(self.model) self.model = compressed_model diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 51dfe98507..369ad0f836 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -64,8 +64,8 @@ def get_num_quantized_nodes(ov_model): class OVQuantizerTest(unittest.TestCase): # TODO : add models SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = ( - (OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 32, 32), - (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 41, 21), + (OVModelForSequenceClassification, "hf-internal-testing/tiny-random-bert", 32, 35), + (OVModelForCausalLM, "hf-internal-testing/tiny-random-gpt2", 41, 22), ) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS) From 2e003c15421abc7572a2f1c82643aa478d3c3e1d Mon Sep 17 00:00:00 2001 From: eaidova Date: Mon, 21 Aug 2023 11:05:13 +0400 Subject: [PATCH 27/38] experiments with tests --- .github/workflows/test_openvino.yml | 6 ++- tests/openvino/test_modeling.py | 72 +++++++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml index cb58f412a6..80ab12c2f0 100644 --- a/.github/workflows/test_openvino.yml +++ b/.github/workflows/test_openvino.yml @@ -35,4 +35,8 @@ jobs: pip install .[openvino,nncf,tests,diffusers] - name: Test with Pytest run: | - pytest tests/openvino/ --ignore test_modeling_basic + pytest tests/openvino/test_modeling.py + pytest tests/openvino/test_quantization.py + pytest tests/openvino/test_stable_diffusion.py + pytest tests/openvino/test_training_examples.py + pytest tests/openvino/test_training.py diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 2ffbbd6fba..c2d54893bc 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -117,6 +117,9 @@ def test_load_from_hub_and_save_model(self): outputs = model(**tokens) self.assertTrue(torch.equal(loaded_model_outputs.logits, outputs.logits)) + del loaded_model + del model + gc.collect() def test_load_from_hub_and_save_decoder_model(self): tokenizer = AutoTokenizer.from_pretrained(self.OV_DECODER_MODEL_ID) @@ -134,6 +137,9 @@ def test_load_from_hub_and_save_decoder_model(self): outputs = model(**tokens) self.assertTrue(torch.equal(loaded_model_outputs.logits, outputs.logits)) + del loaded_model + del model + gc.collect() def test_load_from_hub_and_save_seq2seq_model(self): tokenizer = AutoTokenizer.from_pretrained(self.OV_SEQ2SEQ_MODEL_ID) @@ -153,6 +159,9 @@ def test_load_from_hub_and_save_seq2seq_model(self): outputs = model.generate(**tokens) self.assertTrue(torch.equal(loaded_model_outputs, outputs)) + del loaded_model + del model + gc.collect() @require_diffusers def test_load_from_hub_and_save_stable_diffusion_model(self): @@ -186,6 +195,8 @@ def test_load_from_hub_and_save_stable_diffusion_model(self): np.random.seed(0) outputs = pipeline(**inputs).images self.assertTrue(np.array_equal(pipeline_outputs, outputs)) + del pipeline + gc.collect() class OVModelForSequenceClassificationIntegrationTest(unittest.TestCase): @@ -228,6 +239,9 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(ov_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type]) # Compare tensor outputs self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-4)) + del transformers_model + del ov_model + gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_pipeline(self, model_arch): @@ -258,6 +272,7 @@ def test_pipeline(self, model_arch): self.assertGreaterEqual(outputs[0]["score"], 0.0) self.assertIsInstance(outputs[0]["label"], str) del model + del pipe gc.collect() @@ -327,6 +342,10 @@ def test_metric(self): ov_metric = task_evaluator.compute(model_or_pipeline=ov_pipe, data=data, metric="squad") self.assertEqual(ov_metric["exact_match"], transformers_metric["exact_match"]) self.assertEqual(ov_metric["f1"], transformers_metric["f1"]) + del transformers_pipe + del transformers_model + del ov_pipe + del ov_model gc.collect() @@ -356,6 +375,8 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(ov_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type]) # Compare tensor outputs self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-4)) + del transformers_model + del ov_model gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) @@ -367,6 +388,8 @@ def test_pipeline(self, model_arch): outputs = pipe("My Name is Arthur and I live in Lyon.") self.assertEqual(pipe.device, model.device) self.assertTrue(all(item["score"] > 0.0 for item in outputs)) + del model + del pipe gc.collect() @@ -400,6 +423,8 @@ def test_compare_to_transformers(self, model_arch): torch.Tensor(ov_outputs.last_hidden_state), transformers_outputs.last_hidden_state, atol=1e-4 ) ) + del transformers_model + del ov_model gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) @@ -411,6 +436,8 @@ def test_pipeline(self, model_arch): outputs = pipe("My Name is Arthur and I live in Lyon.") self.assertEqual(pipe.device, model.device) self.assertTrue(all(all(isinstance(item, float) for item in row) for row in outputs[0])) + del pipe + del model gc.collect() @@ -451,6 +478,8 @@ def test_compare_to_transformers(self, model_arch): transformers_outputs = transformers_model(**tokens) # Compare tensor outputs self.assertTrue(torch.allclose(ov_outputs.logits, transformers_outputs.logits, atol=1e-4)) + del transformers_model + del ov_model gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) @@ -466,6 +495,8 @@ def test_pipeline(self, model_arch): outputs = pipe("This is a sample", max_length=10) self.assertEqual(pipe.device, model.device) self.assertTrue(all("This is a sample" in item["generated_text"] for item in outputs)) + del pipe + del model gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) @@ -481,6 +512,8 @@ def test_multiple_inputs(self, model_arch): outputs = model.generate(**tokens, generation_config=generation_config) self.assertIsInstance(outputs, torch.Tensor) self.assertEqual(outputs.shape[0], 3) + del model + gc.collect() def test_model_and_decoder_same_device(self): model_id = MODEL_NAMES["gpt2"] @@ -489,6 +522,8 @@ def test_model_and_decoder_same_device(self): self.assertEqual(model._device, "TEST") # Verify that request is being reset self.assertEqual(model.request, None) + del model + gc.collect() def test_compare_with_and_without_past_key_values(self): model_id = MODEL_NAMES["gpt2"] @@ -518,6 +553,9 @@ def test_compare_with_and_without_past_key_values(self): f"With pkv latency: {with_pkv_timer.elapsed:.3f} ms, without pkv latency: {without_pkv_timer.elapsed:.3f} ms," f" speedup: {without_pkv_timer.elapsed / with_pkv_timer.elapsed:.3f}", ) + del model_with_pkv + del model_without_pkv + gc.collect() class OVModelForMaskedLMIntegrationTest(unittest.TestCase): @@ -527,7 +565,7 @@ class OVModelForMaskedLMIntegrationTest(unittest.TestCase): # "camembert", # "convbert", # "data2vec_text", - "deberta", + # "deberta", # "deberta_v2", "distilbert", "electra", @@ -538,7 +576,7 @@ class OVModelForMaskedLMIntegrationTest(unittest.TestCase): "roformer", "squeezebert", "xlm", - # "xlm_roberta", + "xlm_roberta", ) @parameterized.expand(SUPPORTED_ARCHITECTURES) @@ -560,6 +598,8 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(ov_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type]) # Compare tensor outputs self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-4)) + del transformers_model + del ov_model gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) @@ -571,6 +611,8 @@ def test_pipeline(self, model_arch): outputs = pipe(f"This is a {tokenizer.mask_token}.") self.assertEqual(pipe.device, model.device) self.assertTrue(all(item["score"] > 0.0 for item in outputs)) + del pipe + del model gc.collect() @@ -613,6 +655,8 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(ov_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type]) # Compare tensor outputs self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-4)) + del transformers_model + del ov_model gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) @@ -625,6 +669,8 @@ def test_pipeline(self, model_arch): self.assertEqual(pipe.device, model.device) self.assertGreaterEqual(outputs[0]["score"], 0.0) self.assertTrue(isinstance(outputs[0]["label"], str)) + del model + del pipe gc.collect() @parameterized.expand(TIMM_MODELS) @@ -706,6 +752,8 @@ def test_compare_to_transformers(self, model_arch): transformers_outputs = transformers_model(**tokens, **decoder_inputs) # Compare tensor outputs self.assertTrue(torch.allclose(ov_outputs.logits, transformers_outputs.logits, atol=1e-4)) + del transformers_model + del ov_model gc.collect() @@ -738,7 +786,8 @@ def test_pipeline(self, model_arch): outputs = pipe(text) self.assertEqual(pipe.device, model.device) self.assertIsInstance(outputs[0]["translation_text"], str) - + del pipe + del model gc.collect() @parameterized.expand(SUPPORTED_ARCHITECTURES) @@ -758,6 +807,7 @@ def test_generate_utils(self, model_arch): outputs = model.generate(input_ids=tokens["input_ids"]) outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True) self.assertIsInstance(outputs[0], str) + del model gc.collect() @@ -789,6 +839,9 @@ def test_compare_with_and_without_past_key_values(self): f"With pkv latency: {with_pkv_timer.elapsed:.3f} ms, without pkv latency: {without_pkv_timer.elapsed:.3f} ms," f" speedup: {without_pkv_timer.elapsed / with_pkv_timer.elapsed:.3f}", ) + del model_with_pkv + del model_without_pkv + gc.collect() class OVModelForAudioClassificationIntegrationTest(unittest.TestCase): @@ -834,6 +887,10 @@ def test_compare_to_transformers(self, model_arch): # Compare tensor outputs self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-3)) + del transformers_model + del ov_model + gc.collect() + @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_pipeline(self, model_arch): model_id = MODEL_NAMES[model_arch] @@ -843,6 +900,9 @@ def test_pipeline(self, model_arch): outputs = pipe([np.random.random(16000)]) self.assertEqual(pipe.device, model.device) self.assertTrue(all(item["score"] > 0.0 for item in outputs[0])) + del pipe + del model + gc.collect() class OVModelForCTCIntegrationTest(unittest.TestCase): @@ -896,6 +956,8 @@ def test_compare_to_transformers(self, model_arch): # compare tensor outputs self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-4)) + del transformers_model + del ov_model gc.collect() @@ -948,6 +1010,8 @@ def test_compare_to_transformers(self, model_arch): torch.allclose(torch.Tensor(ov_outputs.embeddings), transformers_outputs.embeddings, atol=1e-4) ) + del transformers_model + del ov_model gc.collect() @@ -997,4 +1061,6 @@ def test_compare_to_transformers(self, model_arch): # compare tensor outputs self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-4)) + del transformers_model + del ov_model gc.collect() From d29a0c1385eb1780b565c3b8053dcb3823c25af2 Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 23 Aug 2023 08:45:00 +0400 Subject: [PATCH 28/38] better workaround for nncf patch torch ops and apply review comments --- .github/workflows/test_openvino.yml | 6 +----- optimum/exporters/openvino/convert.py | 21 ++++++++++++++------- optimum/exporters/openvino/utils.py | 6 ++++++ 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml index 80ab12c2f0..cb58f412a6 100644 --- a/.github/workflows/test_openvino.yml +++ b/.github/workflows/test_openvino.yml @@ -35,8 +35,4 @@ jobs: pip install .[openvino,nncf,tests,diffusers] - name: Test with Pytest run: | - pytest tests/openvino/test_modeling.py - pytest tests/openvino/test_quantization.py - pytest tests/openvino/test_stable_diffusion.py - pytest tests/openvino/test_training_examples.py - pytest tests/openvino/test_training.py + pytest tests/openvino/ --ignore test_modeling_basic diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index eb8f20f6c9..1a0d77b357 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -70,14 +70,16 @@ def export( config ([`~exporters.onnx.config.OnnxConfig`]): The ONNX configuration associated with the exported model. output (`Path`): - Directory to store the exported ONNX model. + Directory to store the exported model. opset (`Optional[int]`, defaults to `None`): The version of the ONNX operator set to use. device (`str`, *optional*, defaults to `cpu`): - The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for + The device on which the model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for export on CUDA devices. input_shapes (`Optional[Dict]`, defaults to `None`): - If specified, allows to use specific shapes for the example input provided to the ONNX exporter. + If specified, allows to use specific shapes for the example input provided to the exporter. + from_onnx (`bool`, defaults to False): + If set to True, model will be converted vie exporting to ONNX. Returns: `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from @@ -166,16 +168,18 @@ def export_pytorch( model ([`PreTrainedModel`]): The model to export. config ([`~exporters.onnx.config.OnnxConfig`]): - The ONNX configuration associated with the exported model. + The configuration associated with the exported model. opset (`int`): The version of the ONNX operator set to use. output (`Path`): - Directory to store the exported ONNX model. + Directory to store the exported model. device (`str`, defaults to `"cpu"`): - The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for + The device on which the model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for export on CUDA devices. input_shapes (`optional[Dict]`, defaults to `None`): - If specified, allows to use specific shapes for the example input provided to the ONNX exporter. + If specified, allows to use specific shapes for the example input provided to the exporter. + from_onnx (`bool`, defaults to False): + If set to True, model will be converted vie exporting to ONNX. Returns: `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from @@ -225,6 +229,9 @@ def export_pytorch( input_info = get_input_shapes(dummy_inputs, inputs) custom_patcher = type(config).patch_model_for_export != OnnxConfig.patch_model_for_export try: + # TorchScript used behaind OpenVINO conversion. Optimum supports only return_dict=True models for patching, + # while TorchScript do not support dictionary with values of mixed types (e.g. Tensor and None) in model input/output + # To handle it, additional wrapper on patcher forward applied. if custom_patcher or dict_inputs: patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs) patched_forward = patcher.patched_forward diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py index 9b1867ba83..eafb8da62f 100644 --- a/optimum/exporters/openvino/utils.py +++ b/optimum/exporters/openvino/utils.py @@ -16,6 +16,8 @@ from openvino.runtime import PartialShape +from ...intel.utils.import_utils import is_nncf_available + if is_torch_available(): import torch @@ -79,3 +81,7 @@ def clear_class_registry(): torch._C._jit_clear_class_registry() torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() torch.jit._state._clear_class_state() + if is_nncf_available(): + from nncf.torch import patch_torch_operators + + patch_torch_operators() From 6ccb6d77be5299f4ccd5448f8edad39ac544c610 Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 23 Aug 2023 08:55:07 +0400 Subject: [PATCH 29/38] remove flag from_onnx --- optimum/exporters/openvino/__init__.py | 2 +- optimum/exporters/openvino/convert.py | 10 +--------- optimum/intel/openvino/quantization.py | 11 +++-------- optimum/intel/openvino/trainer.py | 2 +- 4 files changed, 6 insertions(+), 19 deletions(-) diff --git a/optimum/exporters/openvino/__init__.py b/optimum/exporters/openvino/__init__.py index 9dc8b1833d..d87d8dda9e 100644 --- a/optimum/exporters/openvino/__init__.py +++ b/optimum/exporters/openvino/__init__.py @@ -1,5 +1,5 @@ from .__main__ import main_export -from .convert import export, export_models +from .convert import export, export_models, export_pytorch_via_onnx __all__ = ["main_export", "export", "export_models"] diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 1a0d77b357..54482d3dc1 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -59,7 +59,6 @@ def export( device: str = "cpu", input_shapes: Optional[Dict] = None, model_kwargs: Optional[Dict[str, Any]] = None, - from_onnx: bool = False, ) -> Tuple[List[str], List[str]]: """ Exports a Pytorch or TensorFlow model to an OpenVINO Intermediate Representation. @@ -78,8 +77,6 @@ def export( export on CUDA devices. input_shapes (`Optional[Dict]`, defaults to `None`): If specified, allows to use specific shapes for the example input provided to the exporter. - from_onnx (`bool`, defaults to False): - If set to True, model will be converted vie exporting to ONNX. Returns: `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from @@ -103,7 +100,6 @@ def export( device=device, input_shapes=input_shapes, model_kwargs=model_kwargs, - from_onnx=from_onnx, ) elif is_tf_available() and issubclass(type(model), TFPreTrainedModel): @@ -133,6 +129,7 @@ def export_pytorch_via_onnx( ): import torch + output = Path(output) orig_torch_onnx_export = torch.onnx.export torch.onnx.export = functools.partial(orig_torch_onnx_export, do_constant_folding=False) model.config.torchscript = False @@ -159,7 +156,6 @@ def export_pytorch( device: str = "cpu", input_shapes: Optional[Dict] = None, model_kwargs: Optional[Dict[str, Any]] = None, - from_onnx: bool = False, ) -> Tuple[List[str], List[str]]: """ Exports a PyTorch model to an OpenVINO Intermediate Representation. @@ -178,8 +174,6 @@ def export_pytorch( export on CUDA devices. input_shapes (`optional[Dict]`, defaults to `None`): If specified, allows to use specific shapes for the example input provided to the exporter. - from_onnx (`bool`, defaults to False): - If set to True, model will be converted vie exporting to ONNX. Returns: `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from @@ -190,8 +184,6 @@ def export_pytorch( logger.info(f"Using framework PyTorch: {torch.__version__}") output = Path(output) - if from_onnx: - return export_pytorch_via_onnx(model, config, opset, output, device, input_shapes, model_kwargs) with torch.no_grad(): model.config.torchscript = False diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 708be320d4..c758675c97 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -37,7 +37,7 @@ from optimum.exporters.tasks import TasksManager from optimum.quantization_base import OptimumQuantizer -from ...exporters.openvino import export +from ...exporters.openvino import export, export_pytorch_via_onnx from ..utils.constant import _TASK_ALIASES from ..utils.modeling_utils import patch_decoder_attention_mask from .configuration import OVConfig @@ -389,15 +389,10 @@ def _quantize_torchmodel( model_path = save_directory / (onnx_file_name if quantization_config.save_onnx_model else ov_file_name) onnx_path = save_directory / onnx_file_name + export_fn = export if not quantization_config.save_onnx_model else export_pytorch_via_onnx opset = min(onnx_config.DEFAULT_ONNX_OPSET, MAX_ONNX_OPSET) opset = max(opset, MIN_ONNX_QDQ_OPSET) - _, _, is_onnx = export( - model=model, - config=onnx_config, - output=model_path, - opset=opset, - from_onnx=quantization_config.save_onnx_model, - ) + _, _, is_onnx = export_fn(model=model, config=onnx_config, output=model_path, opset=opset) if is_onnx: # Load and save the compressed model model = core.read_model(onnx_path) diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py index e09293739f..2935c20dbf 100644 --- a/optimum/intel/openvino/trainer.py +++ b/optimum/intel/openvino/trainer.py @@ -752,7 +752,7 @@ def _save(self, output_dir: Optional[str] = None, state_dict=None): try: # OpenVINO IR pruning requires static-shaped input ov_model = self._reshape_ir(ov_model, static_shape=True) - apply_moc_transformations(ov_model) + apply_moc_transformations(ov_model, cf=False) if self._get_compression_controller_by_cls(QuantizationController) is not None: compress_quantize_weights_transformation(ov_model) apply_pruning_transformation(ov_model) From 8775ab2ce8fc469023cdf9f4d692ab605cd885d5 Mon Sep 17 00:00:00 2001 From: Aidova Date: Wed, 30 Aug 2023 17:44:03 +0400 Subject: [PATCH 30/38] refactoring --- optimum/exporters/openvino/convert.py | 23 ++++++++++++++++------ optimum/exporters/openvino/utils.py | 4 ---- optimum/intel/openvino/modeling_base.py | 8 ++++---- optimum/intel/openvino/modeling_decoder.py | 11 +++++------ optimum/intel/openvino/quantization.py | 1 + 5 files changed, 27 insertions(+), 20 deletions(-) diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 54482d3dc1..8ec6576796 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -25,7 +25,7 @@ from openvino.runtime.utils.types import get_element_type from openvino.tools.ovc import convert_model from optimum.exporters.onnx.base import OnnxConfig -from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed, export_tensorflow +from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed, export_tensorflow as export_tensorflow_onnx from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx from optimum.utils import is_diffusers_available @@ -118,6 +118,18 @@ def export( ) +def export_tensorflow(model: Union["PreTrainedModel", "ModelMixin"], config: OnnxConfig, opset: int, output: Path): + onnx_path = Path(output).with_suffix(".onnx") + input_names, output_names = export_tensorflow_onnx(model, config, opset, onnx_path) + ov_model = convert_model(str(onnx_path)) + save_model( + ov_model, + output.parent / output, + compress_to_fp16=False, + ) + return input_names, output_names, True + + def export_pytorch_via_onnx( model: Union["PreTrainedModel", "ModelMixin"], config: OnnxConfig, @@ -221,9 +233,10 @@ def export_pytorch( input_info = get_input_shapes(dummy_inputs, inputs) custom_patcher = type(config).patch_model_for_export != OnnxConfig.patch_model_for_export try: - # TorchScript used behaind OpenVINO conversion. Optimum supports only return_dict=True models for patching, + # TorchScript used behind OpenVINO conversion. Optimum supports only return_dict=True models for patching, # while TorchScript do not support dictionary with values of mixed types (e.g. Tensor and None) in model input/output # To handle it, additional wrapper on patcher forward applied. + # model.config.torchscript = True can not be used for patching, because it overrides return_dict to Flase if custom_patcher or dict_inputs: patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs) patched_forward = patcher.patched_forward @@ -248,7 +261,6 @@ def ts_patched_forward(*args, **kwargs): except Exception as ex: logger.warning(f"Export model to OpenVINO directly failed with: \n{ex}.\nModel will be exported to ONNX") return export_pytorch_via_onnx(model, config, opset, output, device, input_shapes, model_kwargs) - clear_class_registry() ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} ordered_input_names = list(inputs) flatten_inputs = flattenize_inputs(ordered_dummy_inputs.values()) @@ -269,9 +281,8 @@ def ts_patched_forward(*args, **kwargs): inp_tensor.get_node().set_partial_shape(static_shape) inp_tensor.get_node().set_element_type(get_element_type(inp_data.cpu().numpy().dtype)) ov_model.validate_nodes_and_infer_types() - save_model( - ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output, compress_to_fp16=False - ) + save_model(ov_model, output, compress_to_fp16=False) + clear_class_registry() del model gc.collect() return input_names, output_names, False diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py index eafb8da62f..af9951a8f4 100644 --- a/optimum/exporters/openvino/utils.py +++ b/optimum/exporters/openvino/utils.py @@ -81,7 +81,3 @@ def clear_class_registry(): torch._C._jit_clear_class_registry() torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() torch.jit._state._clear_class_state() - if is_nncf_available(): - from nncf.torch import patch_torch_operators - - patch_torch_operators() diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index b06670dffa..0fba2e8d3e 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -27,10 +27,10 @@ from optimum.exporters.onnx import OnnxConfig from optimum.exporters.tasks import TasksManager +from optimum.exporters.onnx.base import OnnxConfig from optimum.modeling_base import OptimizedModel from ...exporters.openvino import export -from ...exporters.openvino.utils import is_torch_model from ..utils.import_utils import is_transformers_version from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME @@ -305,18 +305,18 @@ def _to_onnx_to_load( save_dir = TemporaryDirectory() save_dir_path = Path(save_dir.name) - # Export the model to the ONNX format + # Export the model to the OpenVINO IR format export( model=model, config=onnx_config, opset=onnx_config.DEFAULT_ONNX_OPSET, - output=save_dir_path / ONNX_WEIGHTS_NAME, + output=save_dir_path / OV_XML_FILE_NAME, ) return cls._from_pretrained( model_id=save_dir_path, config=config, - from_onnx=not is_torch_model(model), + from_onnx=False, use_auth_token=use_auth_token, revision=revision, force_download=force_download, diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 966bec52f4..a9cd8e309b 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -31,11 +31,10 @@ from optimum.utils import NormalizedConfigManager from ...exporters.openvino import export -from ...exporters.openvino.utils import is_torch_model from ..utils.import_utils import is_transformers_version from ..utils.modeling_utils import patch_decoder_attention_mask from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel -from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, STR_TO_OV_TYPE +from .utils import OV_XML_FILE_NAME, STR_TO_OV_TYPE if is_transformers_version("<", "4.25.0"): @@ -235,18 +234,18 @@ def _from_transformers( # TODO : create ModelPatcher to patch each architecture model = patch_decoder_attention_mask(model) - # Export the model to the ONNX format - export(model=model, config=onnx_config, output=save_dir_path / ONNX_WEIGHTS_NAME) + # Export the model to the OpenVINO IR format + export(model=model, config=onnx_config, output=save_dir_path / OV_XML_FILE_NAME) return cls._from_pretrained( model_id=save_dir_path, config=config, - from_onnx=not is_torch_model(model), + from_onnx=False, use_auth_token=use_auth_token, revision=revision, force_download=force_download, cache_dir=cache_dir, - file_name=ONNX_WEIGHTS_NAME if not is_torch_model(model) else OV_XML_FILE_NAME, + file_name=OV_XML_FILE_NAME, local_files_only=local_files_only, use_cache=use_cache, **kwargs, diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index c758675c97..a56f5222ba 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -396,6 +396,7 @@ def _quantize_torchmodel( if is_onnx: # Load and save the compressed model model = core.read_model(onnx_path) + # Model required second saving for appling weights compression transformations self._save_pretrained(model, output_path) # if onnx conversion happens as fallback for pytorch conversion, remove onnx model if not quantization_config.save_onnx_model: From 0cd1c028338809a0b9f822f8e64f544fd44da497 Mon Sep 17 00:00:00 2001 From: Aidova Date: Wed, 30 Aug 2023 20:29:05 +0400 Subject: [PATCH 31/38] docstrings and typehints --- optimum/exporters/openvino/convert.py | 69 ++++++++++++++++++++++-- optimum/exporters/openvino/utils.py | 70 ++++++++++++++++++++++--- optimum/intel/openvino/modeling_base.py | 8 +-- optimum/intel/utils/modeling_utils.py | 12 ++++- setup.py | 2 +- 5 files changed, 145 insertions(+), 16 deletions(-) diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 8ec6576796..5bcbf95088 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -25,8 +25,9 @@ from openvino.runtime.utils.types import get_element_type from openvino.tools.ovc import convert_model from optimum.exporters.onnx.base import OnnxConfig -from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed, export_tensorflow as export_tensorflow_onnx +from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx +from optimum.exporters.onnx.convert import export_tensorflow as export_tensorflow_onnx from optimum.utils import is_diffusers_available from ...intel.openvino.utils import OV_XML_FILE_NAME @@ -119,6 +120,20 @@ def export( def export_tensorflow(model: Union["PreTrainedModel", "ModelMixin"], config: OnnxConfig, opset: int, output: Path): + """ + Export the TensorFlow model to OpenVINO format. + + Args: + model (Union[): The model to export. + config (OnnxConfig): The configuration of the model. + opset (int): The ONNX opset version to use. + output (Path): The path to save the model. + + Returns: + input_names: list of input names from ONNX configuration + output_names: list of output names from ONNX configuration + bool: True if the model was exported successfully. + """ onnx_path = Path(output).with_suffix(".onnx") input_names, output_names = export_tensorflow_onnx(model, config, opset, onnx_path) ov_model = convert_model(str(onnx_path)) @@ -139,6 +154,30 @@ def export_pytorch_via_onnx( input_shapes: Optional[Dict] = None, model_kwargs: Optional[Dict[str, Any]] = None, ): + """ + Exports a PyTorch model to an OpenVINO Intermediate Representation via ONNX export. + + Args: + model ([`PreTrainedModel`]): + The model to export. + config ([`~exporters.onnx.config.OnnxConfig`]): + The configuration associated with the exported model. + opset (`int`): + The version of the ONNX operator set to use. + output (`Path`): + Directory to store the exported model. + device (`str`, defaults to `"cpu"`): + The device on which the model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for + export on CUDA devices. + input_shapes (`optional[Dict]`, defaults to `None`): + If specified, allows to use specific shapes for the example input provided to the exporter. + model_kwargs (optional[Dict[str, Any]], defaults to `None`): + Additional kwargs for model export + + Returns: + `Tuple[List[str], List[str], bool]`: A tuple with an ordered list of the model's inputs, and the named inputs from + the ONNX configuration and boolean flag - was legacy ONNX path were applied to model or not. + """ import torch output = Path(output) @@ -186,10 +225,12 @@ def export_pytorch( export on CUDA devices. input_shapes (`optional[Dict]`, defaults to `None`): If specified, allows to use specific shapes for the example input provided to the exporter. + model_kwargs (optional[Dict[str, Any]], defaults to `None`): + Additional kwargs for model export Returns: - `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from - the ONNX configuration. + `Tuple[List[str], List[str], bool]`: A tuple with an ordered list of the model's inputs, and the named inputs from + the ONNX configuration and boolean flag - was legacy ONNX path were applied to model or not. """ import torch from torch.utils._pytree import tree_map @@ -299,6 +340,28 @@ def export_models( input_shapes: Optional[Dict] = None, model_kwargs: Optional[Dict[str, Any]] = None, ) -> Tuple[List[List[str]], List[List[str]]]: + """ + Export the models to OpenVINO IR format + + Args: + models_and_onnx_configs (Dict[ str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], "OnnxConfig"]): + output_dir (Path): output directory for saving models + opset (Optional[int], optional, Default to None): ONNX export opset + output_names (Optional[List[str]], optional, Defaults to None): model output names + device (str, optional, Defaults to "cpu"): + The device on which the model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for + export on CUDA devices. + input_shapes (Optional[Dict], optional, Defaults to None): + If specified, allows to use specific shapes for the example input provided to the exporter. + model_kwargs (Optional[Dict[str, Any]], optional): + Additional kwargs for model export + + Raises: + ValueError: if custom names set not equal of number of models + + Returns: + list of input_names and output_names from ONNX configuration + """ outputs = [] if output_names is not None and len(output_names) != len(models_and_onnx_configs): diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py index af9951a8f4..ebd7ec646e 100644 --- a/optimum/exporters/openvino/utils.py +++ b/optimum/exporters/openvino/utils.py @@ -12,25 +12,48 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, Dict, List, Tuple, Union + from transformers.utils import is_torch_available from openvino.runtime import PartialShape - -from ...intel.utils.import_utils import is_nncf_available +from optimum.utils import is_diffusers_available if is_torch_available(): import torch import torch.nn as nn + from transformers.modeling_utils import PreTrainedModel + +if is_diffusers_available(): + from diffusers import ModelMixin + +def is_torch_model(model: Union["PreTrainedModel", "ModelMixin"]): + """ + Checks whether the model is a torch model. -def is_torch_model(model): + Args: + model (Union[PretrainedModel, ModelMixin]): The model to check. + + Returns: + bool: True if the model is a torch model. + """ if not is_torch_available(): return False return isinstance(model, nn.Module) -def flattenize_inputs(inputs): +def flattenize_inputs(inputs: List[Any]): + """ + Flatten the inputs into a list. + + Args: + inputs (List[Any]): The inputs to flatten. + + Returns: + List[Any]: The flattened inputs. + """ flatten_inputs = [] for input_data in inputs: if input_data is None: @@ -42,8 +65,27 @@ def flattenize_inputs(inputs): return flatten_inputs -def remove_none_from_dummy_inputs(dummy_inputs): - def remove_none_from_list_tuple(item): +def remove_none_from_dummy_inputs(dummy_inputs: Dict[str, Any]): + """ + Removes None values from the dictionary. + + Args: + dummy_inputs (Dict[str, Any]): Dictionary with None values. + Returns: + upd_dummy (Dict[str, Any]): updated dictionary with removed None values + dict_dummy (List[Tuple[str, List[str]]]): list of inputs represented as dictionary provided as pair name and list of nested keys + """ + + def remove_none_from_list_tuple(item: Union[List[Any], Tuple[Any]]): + """ + Removes None values from a list or tuple. + + Args: + item (list or tuple): The list or tuple to remove None values from. + + Returns: + list or tuple: The list or tuple with None values removed. + """ new_item = [i for i in item if i is not None] return type(item)(new_item) @@ -63,7 +105,18 @@ def remove_none_from_list_tuple(item): return upd_dummy, dict_dummy -def get_input_shapes(dummy_inputs, inputs): +def get_input_shapes(dummy_inputs: Dict[str, Any], inputs: Dict[str, Any]): + """ + Resolves input shapes based on dynamic axes from input config and dummy input shapes + + Args: + dummy_inputs (Dict[str, Any]): A dictionary of dummy inputs. + inputs (Dict[str, Any]): A dictionary of input tensors. + + Returns: + input_info: List of input info for conversion + + """ input_info = [] for input_name, data in dummy_inputs.items(): if isinstance(data, (tuple, list, dict)): @@ -78,6 +131,9 @@ def get_input_shapes(dummy_inputs, inputs): def clear_class_registry(): + """ + Removes Torchscript cached modules + """ torch._C._jit_clear_class_registry() torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() torch.jit._state._clear_class_state() diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 0fba2e8d3e..b06670dffa 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -27,10 +27,10 @@ from optimum.exporters.onnx import OnnxConfig from optimum.exporters.tasks import TasksManager -from optimum.exporters.onnx.base import OnnxConfig from optimum.modeling_base import OptimizedModel from ...exporters.openvino import export +from ...exporters.openvino.utils import is_torch_model from ..utils.import_utils import is_transformers_version from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME @@ -305,18 +305,18 @@ def _to_onnx_to_load( save_dir = TemporaryDirectory() save_dir_path = Path(save_dir.name) - # Export the model to the OpenVINO IR format + # Export the model to the ONNX format export( model=model, config=onnx_config, opset=onnx_config.DEFAULT_ONNX_OPSET, - output=save_dir_path / OV_XML_FILE_NAME, + output=save_dir_path / ONNX_WEIGHTS_NAME, ) return cls._from_pretrained( model_id=save_dir_path, config=config, - from_onnx=False, + from_onnx=not is_torch_model(model), use_auth_token=use_auth_token, revision=revision, force_download=force_download, diff --git a/optimum/intel/utils/modeling_utils.py b/optimum/intel/utils/modeling_utils.py index 924c65d10a..f11aadd806 100644 --- a/optimum/intel/utils/modeling_utils.py +++ b/optimum/intel/utils/modeling_utils.py @@ -15,6 +15,7 @@ from typing import Tuple import torch +from transformers.modeling_utils import PreTrainedModel # Modified from transformers.models.bloom.modeling_bloom._make_causal_mask @@ -91,7 +92,16 @@ def _prepare_decoder_attention_mask(attention_mask, input_shape, inputs_embeds, return combined_attention_mask -def patch_decoder_attention_mask(model): +def patch_decoder_attention_mask(model: "PreTrainedModel"): + """ + Apply patch on decoder with past model forward to resolve first inference based on model architecture + + Args: + model (PretrainedModel): The model to patch. + + Returns: + model with applied patch + """ if model.config.model_type == "bloom": model.transformer._prepare_attn_mask = _prepare_attn_mask elif model.config.model_type == "llama": diff --git a/setup.py b/setup.py index 4812ede91a..cee7315781 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ "onnxruntime<1.15.0", ], "openvino": ["openvino==2023.1.0.dev20230811", "onnx", "onnxruntime"], - "nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git"], + "nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git", "transformers<4.32.0"], "ipex": ["transformers<4.32.0", "intel-extension-for-pytorch", "onnx"], "diffusers": ["diffusers", "invisible-watermark>=0.2.0"], "quality": QUALITY_REQUIRE, From 1985e21165c420d67673c930042918bbe95f9bbc Mon Sep 17 00:00:00 2001 From: eaidova Date: Mon, 4 Sep 2023 20:34:58 +0400 Subject: [PATCH 32/38] small fixes --- optimum/intel/openvino/modeling_base.py | 8 ++++---- optimum/intel/openvino/quantization.py | 3 +++ optimum/intel/openvino/trainer.py | 4 ++-- setup.py | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index b06670dffa..5ed250ff8d 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -278,7 +278,7 @@ def _from_transformers( onnx_config = onnx_config_class(model.config) - return cls._to_onnx_to_load( + return cls._to_load( model=model, config=config, onnx_config=onnx_config, @@ -290,7 +290,7 @@ def _from_transformers( ) @classmethod - def _to_onnx_to_load( + def _to_load( cls, model: PreTrainedModel, config: PretrainedConfig, @@ -310,13 +310,13 @@ def _to_onnx_to_load( model=model, config=onnx_config, opset=onnx_config.DEFAULT_ONNX_OPSET, - output=save_dir_path / ONNX_WEIGHTS_NAME, + output=save_dir_path / OV_XML_FILE_NAME, ) return cls._from_pretrained( model_id=save_dir_path, config=config, - from_onnx=not is_torch_model(model), + from_onnx=False, use_auth_token=use_auth_token, revision=revision, force_download=force_download, diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index a56f5222ba..3349ce142f 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -359,6 +359,9 @@ def _quantize_torchmodel( else Path(ov_file_name).with_suffix(".onnx") ) if weights_only: + if getattr(self.model.config, "tie_word_embeddings", True): + # to fix problem with shared embedding weights in nncf compress_weights() + self.model.tie_weights() compressed_model = compress_weights(self.model) self.model = compressed_model else: diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py index 2935c20dbf..0bba054ad3 100644 --- a/optimum/intel/openvino/trainer.py +++ b/optimum/intel/openvino/trainer.py @@ -134,7 +134,7 @@ def remap(value): with torch.no_grad(): model.eval() # Disable node additions to be exported in the graph - model.disable_dynamic_graph_building() + model.nncf.disable_dynamic_graph_building() onnx_export( model, model_inputs, @@ -145,7 +145,7 @@ def remap(value): do_constant_folding=True, opset_version=opset, ) - model.enable_dynamic_graph_building() + model.nncf.enable_dynamic_graph_building() class OVTrainer(Trainer): diff --git a/setup.py b/setup.py index cee7315781..9d596442dd 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ "onnxruntime<1.15.0", ], "openvino": ["openvino==2023.1.0.dev20230811", "onnx", "onnxruntime"], - "nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git", "transformers<4.32.0"], + "nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.gitt@release_v260"], "ipex": ["transformers<4.32.0", "intel-extension-for-pytorch", "onnx"], "diffusers": ["diffusers", "invisible-watermark>=0.2.0"], "quality": QUALITY_REQUIRE, From 6857029fcb042191472f23d62253fc0e8b754a44 Mon Sep 17 00:00:00 2001 From: eaidova Date: Mon, 4 Sep 2023 20:40:07 +0400 Subject: [PATCH 33/38] add docstring to main_export --- optimum/exporters/openvino/__main__.py | 64 +++++++++++++++++++++++++ optimum/intel/openvino/modeling_base.py | 1 - setup.py | 2 +- 3 files changed, 65 insertions(+), 2 deletions(-) diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 2c3428aa0c..d6dae040de 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -57,6 +57,70 @@ def main_export( fn_get_submodels: Optional[Callable] = None, **kwargs_shapes, ): + """ + Full-suite OpenVINO export. + + Args: + > Required parameters + + model_name_or_path (`str`): + Model ID on huggingface.co or path on disk to the model repository to export. + output (`Union[str, Path]`): + Path indicating the directory where to store the generated ONNX model. + + > Optional parameters + + task (`Optional[str]`, defaults to `None`): + The task to export the model for. If not specified, the task will be auto-inferred based on the model. For decoder models, + use `xxx-with-past` to export the model using past key values in the decoder. + device (`str`, defaults to `"cpu"`): + The device to use to do the export. Defaults to "cpu". + fp16 (`Optional[bool]`, defaults to `"False"`): + Use half precision during the export. PyTorch-only, requires `device="cuda"`. + framework (`Optional[str]`, defaults to `None`): + The framework to use for the ONNX export (`"pt"` or `"tf"`). If not provided, will attempt to automatically detect + the framework for the checkpoint. + cache_dir (`Optional[str]`, defaults to `None`): + Path indicating where to store cache. The default Hugging Face cache path will be used by default. + trust_remote_code (`bool`, defaults to `False`): + Allows to use custom code for the modeling hosted in the model repository. This option should only be set for repositories + you trust and in which you have read the code, as it will execute on your local machine arbitrary code present in the + model repository. + pad_token_id (`Optional[int]`, defaults to `None`): + This is needed by some models, for some tasks. If not provided, will attempt to use the tokenizer to guess it. + subfolder (`str`, defaults to `""`): + In case the relevant files are located inside a subfolder of the model repo either locally or on huggingface.co, you can + specify the folder name here. + revision (`str`, defaults to `"main"`): + Revision is the specific model version to use. It can be a branch name, a tag name, or a commit id. + force_download (`bool`, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + local_files_only (`Optional[bool]`, defaults to `False`): + Whether or not to only look at local files (i.e., do not try to download the model). + use_auth_token (`Optional[str]`, defaults to `None`): + The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated + when running `transformers-cli login` (stored in `~/.huggingface`). + model_kwargs (`Optional[Dict[str, Any]]`, defaults to `None`): + Experimental usage: keyword arguments to pass to the model during + the export. This argument should be used along the `custom_onnx_configs` argument + in case, for example, the model inputs/outputs are changed (for example, if + `model_kwargs={"output_attentions": True}` is passed). + custom_onnx_configs (`Optional[Dict[str, OnnxConfig]]`, defaults to `None`): + Experimental usage: override the default ONNX config used for the given model. This argument may be useful for advanced users that desire a finer-grained control on the export. An example is available [here](https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model). + fn_get_submodels (`Optional[Callable]`, defaults to `None`): + Experimental usage: Override the default submodels that are used at the export. This is + especially useful when exporting a custom architecture that needs to split the ONNX (e.g. encoder-decoder). If unspecified with custom models, optimum will try to use the default submodels used for the given task, with no guarantee of success. + **kwargs_shapes (`Dict`): + Shapes to use during inference. This argument allows to override the default shapes used during the ONNX export. + + Example usage: + ```python + >>> from optimum.exporters.openvino import main_export + + >>> main_export("gpt2", output="gpt2_onnx/") + ``` + """ output = Path(output) if not output.exists(): output.mkdir(parents=True) diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 5ed250ff8d..8f8aa1526a 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -30,7 +30,6 @@ from optimum.modeling_base import OptimizedModel from ...exporters.openvino import export -from ...exporters.openvino.utils import is_torch_model from ..utils.import_utils import is_transformers_version from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME diff --git a/setup.py b/setup.py index 9d596442dd..e8efff3e54 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ "onnxruntime<1.15.0", ], "openvino": ["openvino==2023.1.0.dev20230811", "onnx", "onnxruntime"], - "nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.gitt@release_v260"], + "nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git@release_v260"], "ipex": ["transformers<4.32.0", "intel-extension-for-pytorch", "onnx"], "diffusers": ["diffusers", "invisible-watermark>=0.2.0"], "quality": QUALITY_REQUIRE, From a127e4309af8b8fb67fa5f3fd3d2c39cff7431a0 Mon Sep 17 00:00:00 2001 From: Aidova Date: Tue, 5 Sep 2023 14:07:41 +0400 Subject: [PATCH 34/38] fix timm models --- optimum/intel/openvino/modeling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/intel/openvino/modeling.py b/optimum/intel/openvino/modeling.py index 1cea230429..95fb0aca8b 100644 --- a/optimum/intel/openvino/modeling.py +++ b/optimum/intel/openvino/modeling.py @@ -549,7 +549,7 @@ def from_pretrained( model = TimmForImageClassification.from_pretrained(model_id, **kwargs) onnx_config = TimmOnnxConfig(model.config) - return cls._to_onnx_to_load( + return cls._to_load( model=model, config=config, onnx_config=onnx_config, From d96bf756c663fd6ac0c05dc0d1a744389bd095d5 Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 13 Sep 2023 11:44:28 +0400 Subject: [PATCH 35/38] fix circular imports --- optimum/exporters/openvino/__main__.py | 4 +++- optimum/exporters/openvino/convert.py | 2 +- optimum/exporters/openvino/utils.py | 3 +++ optimum/intel/utils/modeling_utils.py | 2 +- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index d6dae040de..5cf0adb176 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -27,10 +27,11 @@ from optimum.utils import DEFAULT_DUMMY_SHAPES from optimum.utils.save_utils import maybe_save_preprocessors -from ...intel.openvino.utils import OV_XML_FILE_NAME from .convert import export_models +OV_XML_FILE_NAME = "openvino_model.xml" + logger = logging.getLogger(__name__) if is_torch_available(): @@ -219,6 +220,7 @@ def main_export( custom_onnx_configs=custom_onnx_configs if custom_onnx_configs is not None else {}, custom_architecture=custom_architecture, fn_get_submodels=fn_get_submodels, + _variant="default", ) if not is_stable_diffusion: diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 5bcbf95088..ab688f92fa 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -30,8 +30,8 @@ from optimum.exporters.onnx.convert import export_tensorflow as export_tensorflow_onnx from optimum.utils import is_diffusers_available -from ...intel.openvino.utils import OV_XML_FILE_NAME from .utils import ( + OV_XML_FILE_NAME, clear_class_registry, flattenize_inputs, get_input_shapes, diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py index ebd7ec646e..f0d5366526 100644 --- a/optimum/exporters/openvino/utils.py +++ b/optimum/exporters/openvino/utils.py @@ -29,6 +29,9 @@ from diffusers import ModelMixin +OV_XML_FILE_NAME = "openvino_model.xml" + + def is_torch_model(model: Union["PreTrainedModel", "ModelMixin"]): """ Checks whether the model is a torch model. diff --git a/optimum/intel/utils/modeling_utils.py b/optimum/intel/utils/modeling_utils.py index f11aadd806..17abf1059e 100644 --- a/optimum/intel/utils/modeling_utils.py +++ b/optimum/intel/utils/modeling_utils.py @@ -102,7 +102,7 @@ def patch_decoder_attention_mask(model: "PreTrainedModel"): Returns: model with applied patch """ - if model.config.model_type == "bloom": + if model.config.model_type in {"bloom", "mpt"}: model.transformer._prepare_attn_mask = _prepare_attn_mask elif model.config.model_type == "llama": model.model._prepare_decoder_attention_mask = _prepare_decoder_attention_mask From ab8be3ff38e53f036530fe0f19e6a539787f869b Mon Sep 17 00:00:00 2001 From: eaidova Date: Mon, 18 Sep 2023 14:15:49 +0400 Subject: [PATCH 36/38] update ov version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 69117feb22..d0f232e7d8 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ "onnx", "onnxruntime<1.15.0", ], - "openvino": ["openvino==2023.1.0.dev20230811", "onnx", "onnxruntime"], + "openvino": ["openvino>=2023.1.0", "onnx", "onnxruntime"], "nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git@release_v260"], "ipex": ["transformers<4.32.0", "intel-extension-for-pytorch", "onnx"], "diffusers": ["diffusers"], From 20d99e9c6b95ee4d615db095b7dae5a9adbf39e7 Mon Sep 17 00:00:00 2001 From: eaidova Date: Mon, 18 Sep 2023 14:26:19 +0400 Subject: [PATCH 37/38] revert excluding deberta --- tests/openvino/test_modeling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index f9fba267f8..a4bf9b38e0 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -566,7 +566,7 @@ class OVModelForMaskedLMIntegrationTest(unittest.TestCase): # "camembert", # "convbert", # "data2vec_text", - # "deberta", + "deberta", # "deberta_v2", "distilbert", "electra", From cce69f606e6cec3c46436dfa1c12adf33238e230 Mon Sep 17 00:00:00 2001 From: eaidova Date: Tue, 19 Sep 2023 08:03:58 +0400 Subject: [PATCH 38/38] update nncf on package --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d0f232e7d8..6d81b98b2a 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ "onnxruntime<1.15.0", ], "openvino": ["openvino>=2023.1.0", "onnx", "onnxruntime"], - "nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git@release_v260"], + "nncf": ["nncf>=2.6.0"], "ipex": ["transformers<4.32.0", "intel-extension-for-pytorch", "onnx"], "diffusers": ["diffusers"], "quality": QUALITY_REQUIRE,