Skip to content

Commit b8999fb

Browse files
committed
rebase with fixes
1 parent acd61af commit b8999fb

File tree

9 files changed

+21
-703
lines changed

9 files changed

+21
-703
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .__main__ import main_export
2+
from .convert import export, export_models
3+
4+
5+
__all__ = ["main_export", "export", "export_models"]

optimum/exporters/openvino/convert.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ def export(
101101
device=device,
102102
input_shapes=input_shapes,
103103
model_kwargs=model_kwargs,
104-
opset=opset,
105104
from_onnx=from_onnx,
106105
)
107106

optimum/intel/openvino/export.py

Lines changed: 0 additions & 673 deletions
This file was deleted.

optimum/intel/openvino/modeling_base.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@
2828
from optimum.exporters import TasksManager
2929
from optimum.modeling_base import OptimizedModel
3030

31+
from ...exporters.openvino import export
32+
from ...exporters.openvino.utils import is_torch_model
3133
from ..utils.import_utils import is_transformers_version
32-
from .export import export, is_torch_model
3334
from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME
3435

3536

@@ -128,10 +129,7 @@ def fix_op_names_duplicates(model: openvino.runtime.Model):
128129

129130
if isinstance(file_name, str):
130131
file_name = Path(file_name)
131-
bin_file_name = file_name.with_suffix(".bin") if file_name.suffix == ".xml" else None
132-
model = (
133-
core.read_model(file_name, bin_file_name) if not file_name.suffix == ".onnx" else convert_model(file_name)
134-
)
132+
model = core.read_model(file_name) if not file_name.suffix == ".onnx" else convert_model(file_name)
135133
if file_name.suffix == ".onnx":
136134
model = fix_op_names_duplicates(model) # should be called during model conversion to IR
137135

@@ -198,8 +196,9 @@ def _from_pretrained(
198196
model_save_dir = model_id
199197
# Download the model from the hub
200198
else:
201-
model_file_names = [file_name] if from_onnx else []
199+
model_file_names = [file_name]
202200
# If not ONNX then OpenVINO IR
201+
203202
if not from_onnx:
204203
model_file_names.append(file_name.replace(".xml", ".bin"))
205204
file_names = []

optimum/intel/openvino/modeling_base_seq2seq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727
from optimum.exporters import TasksManager
2828
from optimum.exporters.onnx import get_encoder_decoder_models_for_export
2929

30+
from ...exporters.openvino import export_models
3031
from ..utils.import_utils import is_transformers_version
31-
from .export import export_models
3232
from .modeling_base import OVBaseModel
3333
from .utils import (
3434
ONNX_DECODER_NAME,

optimum/intel/openvino/modeling_decoder.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,10 @@
3030
from optimum.exporters import TasksManager
3131
from optimum.utils import NormalizedConfigManager
3232

33+
from ...exporters.openvino import export
34+
from ...exporters.openvino.utils import is_torch_model
3335
from ..utils.import_utils import is_transformers_version
34-
<<<<<<< HEAD
35-
from ..utils.modeling_utils import _prepare_attn_mask, _prepare_decoder_attention_mask
36-
from .export import export, is_torch_model
37-
=======
3836
from ..utils.modeling_utils import patch_decoder_attention_mask
39-
>>>>>>> fix llama export in quantization flow
4037
from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel
4138
from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, STR_TO_OV_TYPE
4239

optimum/intel/openvino/modeling_diffusion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@
4949
DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER,
5050
)
5151

52+
from ...exporters.openvino import main_export
5253
from .loaders import OVTextualInversionLoaderMixin
53-
from .export import main_export
5454
from .modeling_base import OVBaseModel
5555
from .utils import ONNX_WEIGHTS_NAME, OV_TO_NP_TYPE, OV_XML_FILE_NAME
5656

optimum/intel/openvino/quantization.py

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,14 @@
3030
from nncf.torch.initialization import PTInitializingDataLoader
3131
from openvino._offline_transformations import compress_quantize_weights_transformation
3232
from openvino.runtime import Core, Tensor
33-
from torch.utils.data import DataLoader, RandomSampler, TensorDataset
33+
from torch.utils.data import DataLoader, RandomSampler
3434
from transformers import DataCollator, PreTrainedModel, default_data_collator
3535
from transformers.pytorch_utils import Conv1D
3636

3737
from optimum.exporters.tasks import TasksManager
3838
from optimum.quantization_base import OptimumQuantizer
3939

40+
from ...exporters.openvino import export
4041
from ..utils.constant import _TASK_ALIASES
4142
from ..utils.modeling_utils import patch_decoder_attention_mask
4243
from .configuration import OVConfig
@@ -353,7 +354,7 @@ def _quantize_torchmodel(
353354
"No configuration describing the quantization process was provided, a default OVConfig will be generated."
354355
)
355356
quantization_config = OVConfig()
356-
357+
357358
if weights_only:
358359
compressed_model = compress_weights(self.model)
359360
self.model = compressed_model
@@ -377,19 +378,18 @@ def _quantize_torchmodel(
377378
task = self.task
378379
model = self.model
379380
self.model.config.save_pretrained(save_directory)
380-
381+
model = patch_decoder_attention_mask(model)
381382
if task == "text-generation":
382-
model = patch_decoder_attention_mask(model)
383383
onnx_config = onnx_config_class(model.config, use_past=model.config.use_cache)
384384
else:
385385
onnx_config = onnx_config_class(model.config)
386386

387-
model_path = save_directory / onnx_file_name if quantization_config.save_onnx_model else ov_file_name
387+
model_path = save_directory / (onnx_file_name if quantization_config.save_onnx_model else ov_file_name)
388388
onnx_path = save_directory / onnx_file_name
389389
opset = min(onnx_config.DEFAULT_ONNX_OPSET, MAX_ONNX_OPSET)
390390
opset = max(opset, MIN_ONNX_QDQ_OPSET)
391391
_, _, is_onnx = export(
392-
model=compressed_model,
392+
model=model,
393393
config=onnx_config,
394394
output=model_path,
395395
opset=opset,
@@ -399,17 +399,8 @@ def _quantize_torchmodel(
399399
# Load and save the compressed model
400400
model = core.read_model(onnx_path)
401401
self._save_pretrained(model, output_path)
402-
<<<<<<< HEAD
403-
else:
404-
_, _, is_onnx = export(model=compressed_model, config=onnx_config, output=output_path)
405-
if is_onnx:
406-
onnx_path = output_path.replace(".xml", ".onnx")
407-
model = core.read_model(onnx_path)
408-
self._save_pretrained(model, output_path)
409-
=======
410402
# if onnx conversion happens as fallback for pytorch conversion, remove onnx model
411403
if not quantization_config.save_onnx_model:
412-
>>>>>>> fix llama export in quantization flow
413404
os.remove(onnx_path)
414405
try:
415406
os.remove(f"{onnx_path}_data")

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
"onnxruntime<1.15.0",
4242
],
4343
"openvino": ["openvino==2023.1.0.dev20230728", "onnx", "onnxruntime"],
44-
"nncf": ["git+https://github.com/openvinotoolkit/nncf.git"],
44+
"nncf": ["nncf @ git+https://github.com/openvinotoolkit/nncf.git"],
4545
"ipex": ["intel-extension-for-pytorch", "onnx"],
4646
"diffusers": ["diffusers", "invisible-watermark>=0.2.0"],
4747
"quality": QUALITY_REQUIRE,

0 commit comments

Comments
 (0)