rebase with fixes

eaidova · eaidova · commit b8999fbbbb78 · 2023-08-18T13:38:27.000+04:00
diff --git a/optimum/exporters/openvino/__init__.py b/optimum/exporters/openvino/__init__.py
@@ -0,0 +1,5 @@
+from .__main__ import main_export
+from .convert import export, export_models
+
+
+__all__ = ["main_export", "export", "export_models"]
diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py
@@ -101,7 +101,6 @@ def export(
             device=device,
             input_shapes=input_shapes,
             model_kwargs=model_kwargs,
-            opset=opset,
             from_onnx=from_onnx,
         )
 
diff --git a/optimum/intel/openvino/export.py b/optimum/intel/openvino/export.py
diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py
@@ -28,8 +28,9 @@
 from optimum.exporters import TasksManager
 from optimum.modeling_base import OptimizedModel
 
+from ...exporters.openvino import export
+from ...exporters.openvino.utils import is_torch_model
 from ..utils.import_utils import is_transformers_version
-from .export import export, is_torch_model
 from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME
 
 
@@ -128,10 +129,7 @@ def fix_op_names_duplicates(model: openvino.runtime.Model):
 
         if isinstance(file_name, str):
             file_name = Path(file_name)
-        bin_file_name = file_name.with_suffix(".bin") if file_name.suffix == ".xml" else None
-        model = (
-            core.read_model(file_name, bin_file_name) if not file_name.suffix == ".onnx" else convert_model(file_name)
-        )
+        model = core.read_model(file_name) if not file_name.suffix == ".onnx" else convert_model(file_name)
         if file_name.suffix == ".onnx":
             model = fix_op_names_duplicates(model)  # should be called during model conversion to IR
 
@@ -198,8 +196,9 @@ def _from_pretrained(
             model_save_dir = model_id
         # Download the model from the hub
         else:
-            model_file_names = [file_name] if from_onnx else []
+            model_file_names = [file_name]
             # If not ONNX then OpenVINO IR
+
             if not from_onnx:
                 model_file_names.append(file_name.replace(".xml", ".bin"))
             file_names = []
diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py
@@ -27,8 +27,8 @@
 from optimum.exporters import TasksManager
 from optimum.exporters.onnx import get_encoder_decoder_models_for_export
 
+from ...exporters.openvino import export_models
 from ..utils.import_utils import is_transformers_version
-from .export import export_models
 from .modeling_base import OVBaseModel
 from .utils import (
     ONNX_DECODER_NAME,
diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py
@@ -30,13 +30,10 @@
 from optimum.exporters import TasksManager
 from optimum.utils import NormalizedConfigManager
 
+from ...exporters.openvino import export
+from ...exporters.openvino.utils import is_torch_model
 from ..utils.import_utils import is_transformers_version
-<<<<<<< HEAD
-from ..utils.modeling_utils import _prepare_attn_mask, _prepare_decoder_attention_mask
-from .export import export, is_torch_model
-=======
 from ..utils.modeling_utils import patch_decoder_attention_mask
->>>>>>> fix llama export in quantization flow
 from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel
 from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, STR_TO_OV_TYPE
 
diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
@@ -49,8 +49,8 @@
     DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER,
 )
 
+from ...exporters.openvino import main_export
 from .loaders import OVTextualInversionLoaderMixin
-from .export import main_export
 from .modeling_base import OVBaseModel
 from .utils import ONNX_WEIGHTS_NAME, OV_TO_NP_TYPE, OV_XML_FILE_NAME
 
diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py
@@ -30,13 +30,14 @@
 from nncf.torch.initialization import PTInitializingDataLoader
 from openvino._offline_transformations import compress_quantize_weights_transformation
 from openvino.runtime import Core, Tensor
-from torch.utils.data import DataLoader, RandomSampler, TensorDataset
+from torch.utils.data import DataLoader, RandomSampler
 from transformers import DataCollator, PreTrainedModel, default_data_collator
 from transformers.pytorch_utils import Conv1D
 
 from optimum.exporters.tasks import TasksManager
 from optimum.quantization_base import OptimumQuantizer
 
+from ...exporters.openvino import export
 from ..utils.constant import _TASK_ALIASES
 from ..utils.modeling_utils import patch_decoder_attention_mask
 from .configuration import OVConfig
@@ -353,7 +354,7 @@ def _quantize_torchmodel(
                 "No configuration describing the quantization process was provided, a default OVConfig will be generated."
             )
             quantization_config = OVConfig()
-            
+
         if weights_only:
             compressed_model = compress_weights(self.model)
             self.model = compressed_model
@@ -377,19 +378,18 @@ def _quantize_torchmodel(
         task = self.task
         model = self.model
         self.model.config.save_pretrained(save_directory)
-
+        model = patch_decoder_attention_mask(model)
         if task == "text-generation":
-            model = patch_decoder_attention_mask(model)
             onnx_config = onnx_config_class(model.config, use_past=model.config.use_cache)
         else:
             onnx_config = onnx_config_class(model.config)
 
-        model_path = save_directory / onnx_file_name if quantization_config.save_onnx_model else ov_file_name
+        model_path = save_directory / (onnx_file_name if quantization_config.save_onnx_model else ov_file_name)
         onnx_path = save_directory / onnx_file_name
         opset = min(onnx_config.DEFAULT_ONNX_OPSET, MAX_ONNX_OPSET)
         opset = max(opset, MIN_ONNX_QDQ_OPSET)
         _, _, is_onnx = export(
-            model=compressed_model,
+            model=model,
             config=onnx_config,
             output=model_path,
             opset=opset,
@@ -399,17 +399,8 @@ def _quantize_torchmodel(
             # Load and save the compressed model
             model = core.read_model(onnx_path)
             self._save_pretrained(model, output_path)
-<<<<<<< HEAD
-        else:
-            _, _, is_onnx = export(model=compressed_model, config=onnx_config, output=output_path)
-            if is_onnx:
-                onnx_path = output_path.replace(".xml", ".onnx")
-                model = core.read_model(onnx_path)
-                self._save_pretrained(model, output_path)
-=======
             # if onnx conversion happens as fallback for pytorch conversion, remove onnx model
             if not quantization_config.save_onnx_model:
->>>>>>> fix llama export in quantization flow
                 os.remove(onnx_path)
                 try:
                     os.remove(f"{onnx_path}_data")
diff --git a/setup.py b/setup.py
@@ -41,7 +41,7 @@
         "onnxruntime<1.15.0",
     ],
     "openvino": ["openvino==2023.1.0.dev20230728", "onnx", "onnxruntime"],
-    "nncf": ["git+https://github.com/openvinotoolkit/nncf.git"],
+    "nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git"],
     "ipex": ["intel-extension-for-pytorch", "onnx"],
     "diffusers": ["diffusers", "invisible-watermark>=0.2.0"],
     "quality": QUALITY_REQUIRE,

Original file line number	Diff line number	Diff line change
`@@ -101,7 +101,6 @@ def export(`
`101`	`101`	`device=device,`
`102`	`102`	`input_shapes=input_shapes,`
`103`	`103`	`model_kwargs=model_kwargs,`
`104`		`- opset=opset,`
`105`	`104`	`from_onnx=from_onnx,`
`106`	`105`	`)`
`107`	`106`
Original file line number	Diff line number	Diff line change
`@@ -49,8 +49,8 @@`
`49`	`49`	`DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER,`
`50`	`50`	`)`
`51`	`51`
	`52`	`+from ...exporters.openvino import main_export`
`52`	`53`	`from .loaders import OVTextualInversionLoaderMixin`
`53`		`-from .export import main_export`
`54`	`54`	`from .modeling_base import OVBaseModel`
`55`	`55`	`from .utils import ONNX_WEIGHTS_NAME, OV_TO_NP_TYPE, OV_XML_FILE_NAME`
`56`	`56`