diff --git a/optimum/intel/openvino/modeling.py b/optimum/intel/openvino/modeling.py index f9a0b45ace..9acd6f7cd0 100644 --- a/optimum/intel/openvino/modeling.py +++ b/optimum/intel/openvino/modeling.py @@ -48,8 +48,7 @@ XVectorOutput, ) -from optimum.exporters import TasksManager -from optimum.exporters.onnx import export +from optimum.exporters import TasksManager, onnx from optimum.modeling_base import OptimizedModel from .modeling_base import OVBaseModel @@ -523,17 +522,14 @@ def from_pretrained( ): # Fix the mismatch between timm_config and huggingface_config if not os.path.isdir(model_id) and model_info(model_id).library_name == "timm": - return OVModelForTimm.from_timm( - model_id=model_id, - use_auth_token=use_auth_token, - revision=revision, - force_download=force_download, - cache_dir=cache_dir, - subfolder=subfolder, - local_files_only=local_files_only, - task=task, - trust_remote_code=trust_remote_code, - **kwargs, + config = TimmConfig.from_pretrained(model_id, **kwargs) + model = TimmForImageClassification.from_pretrained(model_id, **kwargs) + onnx_config = TimmOnnxConfig(model.config) + + return cls._to_onnx_to_load( + model=model, + config=config, + onnx_config=onnx_config, ) else: return super().from_pretrained( @@ -580,167 +576,6 @@ def forward( return ImageClassifierOutput(logits=logits) -class OVModelForTimm(OVModel): - @classmethod - def _from_transformers( - cls, - model_id: str, - config: PretrainedConfig, - use_auth_token: Optional[Union[bool, str]] = None, - revision: Optional[str] = None, - force_download: bool = False, - cache_dir: Optional[str] = None, - subfolder: str = "", - local_files_only: bool = False, - task: Optional[str] = None, - trust_remote_code: bool = False, - **kwargs, - ): - task = task or cls.export_feature - - model = TimmForImageClassification.from_pretrained(model_id, **kwargs) - onnx_config = TimmOnnxConfig(model.config) - save_dir = TemporaryDirectory() - - with TemporaryDirectory() as save_dir: - save_dir_path = Path(save_dir) - export( - model=model, - config=onnx_config, - opset=onnx_config.DEFAULT_TIMM_ONNX_OPSET, - output=save_dir_path / ONNX_WEIGHTS_NAME, - ) - - return cls._from_pretrained( - model_id=save_dir_path, - config=config, - from_onnx=True, - use_auth_token=use_auth_token, - revision=revision, - force_download=force_download, - cache_dir=cache_dir, - local_files_only=local_files_only, - **kwargs, - ) - - @classmethod - def from_timm( - cls, - model_id: Union[str, Path], - export: bool = False, - force_download: bool = False, - use_auth_token: Optional[str] = None, - cache_dir: Optional[str] = None, - subfolder: str = "", - config: Optional["PretrainedConfig"] = None, - local_files_only: bool = False, - trust_remote_code: bool = False, - revision: Optional[str] = None, - **kwargs, - ) -> "OptimizedModel": - """ - Returns: - `OptimizedModel`: The loaded optimized model. - """ - if isinstance(model_id, Path): - model_id = model_id.as_posix() - - from_transformers = kwargs.pop("from_transformers", None) - if from_transformers is not None: - logger.warning( - "The argument `from_transformers` is deprecated, and will be removed in optimum 2.0. Use `export` instead" - ) - export = from_transformers - - if len(model_id.split("@")) == 2: - if revision is not None: - logger.warning( - f"The argument `revision` was set to {revision} but will be ignored for {model_id.split('@')[1]}" - ) - model_id, revision = model_id.split("@") - - # if config is None: - # if os.path.isdir(os.path.join(model_id, subfolder)) and cls.config_name == CONFIG_NAME: - # if CONFIG_NAME in os.listdir(os.path.join(model_id, subfolder)): - # config = AutoConfig.from_pretrained( - # os.path.join(model_id, subfolder, CONFIG_NAME), trust_remote_code=trust_remote_code - # ) - # elif CONFIG_NAME in os.listdir(model_id): - # config = AutoConfig.from_pretrained( - # os.path.join(model_id, CONFIG_NAME), trust_remote_code=trust_remote_code - # ) - # logger.info( - # f"config.json not found in the specified subfolder {subfolder}. Using the top level config.json." - # ) - # else: - # raise OSError(f"config.json not found in {model_id} local folder") - # else: - config = cls._load_config( - model_id, - revision=revision, - cache_dir=cache_dir, - use_auth_token=use_auth_token, - force_download=force_download, - subfolder=subfolder, - trust_remote_code=trust_remote_code, - ) - # elif isinstance(config, (str, os.PathLike)): - # config = cls._load_config( - # config, - # revision=revision, - # cache_dir=cache_dir, - # use_auth_token=use_auth_token, - # force_download=force_download, - # subfolder=subfolder, - # trust_remote_code=trust_remote_code, - # ) - - if not export and trust_remote_code: - logger.warning( - "The argument `trust_remote_code` is to be used along with export=True. It will be ignored." - ) - elif export and trust_remote_code is None: - trust_remote_code = False - - # from_pretrained_method = cls._from_transformers if export else cls._from_pretrained - return cls._from_transformers( - model_id=model_id, - config=config, - revision=revision, - cache_dir=cache_dir, - force_download=force_download, - use_auth_token=use_auth_token, - subfolder=subfolder, - local_files_only=local_files_only, - trust_remote_code=trust_remote_code, - **kwargs, - ) - - def forward( - self, - pixel_values: Union[torch.Tensor, np.ndarray], - **kwargs, - ): - self.compile() - - np_inputs = isinstance(pixel_values, np.ndarray) - if not np_inputs: - pixel_values = np.array(pixel_values) - - inputs = { - "pixel_values": pixel_values, - } - - # Run inference - outputs = self.request(inputs) - logits = torch.from_numpy(outputs["logits"]).to(self.device) if not np_inputs else outputs["logits"] - return ImageClassifierOutput(logits=logits) - - @classmethod - def _load_config(cls, model_id, **kwargs): - return TimmConfig.from_pretrained(model_id, **kwargs) - - AUDIO_CLASSIFICATION_EXAMPLE = r""" Example of audio classification using `transformers.pipelines`: ```python diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index a6087ff952..1008a1ab9a 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -25,7 +25,7 @@ from transformers import PretrainedConfig from transformers.file_utils import add_start_docstrings -from optimum.exporters.onnx import export +from optimum.exporters.onnx import export, OnnxConfig from optimum.exporters.tasks import TasksManager from optimum.modeling_base import OptimizedModel @@ -276,6 +276,31 @@ def _from_transformers( ) onnx_config = onnx_config_class(model.config) + + return cls._to_onnx_to_load( + model=model, + config=config, + onnx_config=onnx_config, + use_auth_token=use_auth_token, + revision=revision, + force_download=force_download, + cache_dir=cache_dir, + local_files_only=local_files_only, + ) + + @classmethod + def _to_onnx_to_load( + cls, + model: PreTrainedModel, + config: PretrainedConfig, + onnx_config:OnnxConfig, + use_auth_token: Optional[Union[bool, str]] = None, + revision: Optional[str] = None, + force_download: bool = False, + cache_dir: Optional[str] = None, + local_files_only: bool = False, + **kwargs, + ): save_dir = TemporaryDirectory() save_dir_path = Path(save_dir.name) diff --git a/optimum/intel/openvino/modeling_timm.py b/optimum/intel/openvino/modeling_timm.py index 5bb80de0e3..43c629422f 100644 --- a/optimum/intel/openvino/modeling_timm.py +++ b/optimum/intel/openvino/modeling_timm.py @@ -62,67 +62,19 @@ class TimmOnnxConfig(ViTOnnxConfig): outputs = OrderedDict([("logits", {0: "batch_size"})]) -class TimmPreTrainedModel(PreTrainedModel): - config_class = TimmConfig - base_model_prefix = "timm" - main_input_name = "pixel_values" - -class TimmModel(TimmPreTrainedModel): - def __init__( - self, config: TimmConfig, feature_only: bool = True, pretrained: bool = True, in_chans: int = 3, **kwargs - ): - super().__init__(config) - - self.config = config - if feature_only: - self.timm_model = timm.create_model( - "hf-hub:" + self.config.hf_hub_id, num_classes=0, pretrained=pretrained, in_chans=in_chans - ) - else: - self.timm_model = timm.create_model( - "hf-hub:" + self.config.hf_hub_id, - num_classes=self.config.num_labels, - pretrained=pretrained, - in_chans=in_chans, - ) - self.timm_model.eval() - - @classmethod - def from_pretrained(cls, model_name_or_path, **kwargs): - config = TimmConfig.from_pretrained(model_name_or_path, **kwargs) - return cls(config, **kwargs) - - def forward( - self, - pixel_values: Optional[torch.Tensor] = None, - return_dict: Optional[bool] = None, - ) -> Union[Tuple, BaseModelOutput]: - return_dict = return_dict if return_dict is not None else self.config.use_return_dict - - if pixel_values is None: - raise ValueError("You have to specify pixel_values") - - # TODO: maybe have a cleaner way to cast the input (from `ImageProcessor` side?) - # expected_dtype = self.embeddings.patch_embeddings.projection.weight.dtype - # if pixel_values.dtype != expected_dtype: - # pixel_values = pixel_values.to(expected_dtype) - - model_output = self.timm_model(pixel_values) - - if not return_dict: - return model_output - - return BaseModelOutput(last_hidden_state=model_output, hidden_states=None) - - -class TimmForImageClassification(TimmPreTrainedModel): +class TimmForImageClassification(PreTrainedModel): def __init__(self, config: TimmConfig, num_labels: int = None, **kwargs) -> None: super().__init__(config, **kwargs) - if num_labels: config.num_labels = num_labels - self.timm = TimmModel(config, feature_only=False) + self.model = timm.create_model( + "hf-hub:" + self.config.hf_hub_id, + num_classes=self.config.num_labels, + pretrained=True, + in_chans=3, + ) + self.model.eval() @classmethod def from_pretrained(cls, model_name_or_path, **kwargs): @@ -131,54 +83,14 @@ def from_pretrained(cls, model_name_or_path, **kwargs): def forward( self, - pixel_values: Optional[torch.Tensor] = None, - labels: Optional[torch.Tensor] = None, - return_dict: Optional[bool] = None, - ) -> Union[tuple, ImageClassifierOutput]: - r""" - labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): - Labels for computing the image classification/regression loss. Indices should be in `[0, ..., - config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If - `config.num_labels > 1` a classification loss is computed (Cross-Entropy). - """ - return_dict = return_dict if return_dict is not None else self.config.use_return_dict - - logits = self.timm( + pixel_values: Optional[torch.Tensor] = None + ): + logits = self.model( pixel_values, - return_dict=return_dict, ) - loss = None - if labels is not None: - # move labels to correct device to enable model parallelism - labels = labels.to(logits.device) - if self.config.problem_type is None: - if self.num_labels == 1: - self.config.problem_type = "regression" - elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int): - self.config.problem_type = "single_label_classification" - else: - self.config.problem_type = "multi_label_classification" - - if self.config.problem_type == "regression": - loss_fct = MSELoss() - if self.num_labels == 1: - loss = loss_fct(logits.squeeze(), labels.squeeze()) - else: - loss = loss_fct(logits, labels) - elif self.config.problem_type == "single_label_classification": - loss_fct = CrossEntropyLoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) - elif self.config.problem_type == "multi_label_classification": - loss_fct = BCEWithLogitsLoss() - loss = loss_fct(logits, labels) - - if not return_dict: - return logits - return ImageClassifierOutput( - loss=loss, - logits=logits.last_hidden_state, + logits=logits, ) @@ -392,7 +304,5 @@ def preprocess( images = [self.normalize(image=image, mean=image_mean, std=image_std) for image in images] images = [to_channel_dimension_format(image, data_format) for image in images] - # print(images[0].shape) - # print('top from preprocessor') data = {"pixel_values": images} return BatchFeature(data=data, tensor_type=return_tensors)