From fae2541ca2234ff00212895f50886556f5e67cb4 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 8 Jun 2023 14:37:27 +0000 Subject: [PATCH 1/2] Fix loading --- src/diffusers/models/modeling_utils.py | 15 +++++++++++++++ .../unidiffuser/modeling_text_decoder.py | 1 + 2 files changed, 16 insertions(+) diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py index c9fabf93253b..135a79adfb61 100644 --- a/src/diffusers/models/modeling_utils.py +++ b/src/diffusers/models/modeling_utils.py @@ -17,6 +17,7 @@ import inspect import itertools import os +import re from functools import partial from typing import Any, Callable, List, Optional, Tuple, Union @@ -162,6 +163,7 @@ class ModelMixin(torch.nn.Module): config_name = CONFIG_NAME _automatically_saved_args = ["_diffusers_version", "_class_name", "_name_or_path"] _supports_gradient_checkpointing = False + _keys_to_ignore_on_load_unexpected = None def __init__(self): super().__init__() @@ -608,6 +610,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P " `low_cpu_mem_usage=False` and `device_map=None` if you want to randomly initialize" " those weights or else make sure your checkpoint file is correct." ) + unexpected_keys = [] empty_state_dict = model.state_dict() for param_name, param in state_dict.items(): @@ -615,6 +618,10 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P inspect.signature(set_module_tensor_to_device).parameters.keys() ) + if param_name not in empty_state_dict: + unexpected_keys.append(param_name) + continue + if empty_state_dict[param_name].shape != param.shape: raise ValueError( f"Cannot load {pretrained_model_name_or_path} because {param_name} expected shape {empty_state_dict[param_name]}, but got {param.shape}. If you want to instead overwrite randomly initialized weights, please make sure to pass both `low_cpu_mem_usage=False` and `ignore_mismatched_sizes=True`. For more information, see also: https://github.com/huggingface/diffusers/issues/1619#issuecomment-1345604389 as an example." @@ -626,6 +633,14 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P ) else: set_module_tensor_to_device(model, param_name, param_device, value=param) + + if cls._keys_to_ignore_on_load_unexpected is not None: + for pat in cls._keys_to_ignore_on_load_unexpected: + unexpected_keys = [k for k in unexpected_keys if re.search(pat, k) is None] + + if len(unexpected_keys) > 0: + logger.warn(f"Some weights of the model checkpoint were not used when initializing {cls.__name__}: \n {[', '.join(unexpected_keys)]}") + else: # else let accelerate handle loading and dispatching. # Load weights and dispatch according to the device_map # by default the device_map is None and the weights are loaded on the CPU diff --git a/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py b/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py index febc8e09e6ab..9dfce5d40028 100644 --- a/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py +++ b/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py @@ -60,6 +60,7 @@ class UniDiffuserTextDecoder(ModelMixin, ConfigMixin, ModuleUtilsMixin): Whether to scale keys (K) prior to computing attention (dot-product) and upcast attention dot-product/softmax to float() when training with mixed precision. """ + _keys_to_ignore_on_load_unexpected = [r"h\.\d+\.attn\.bias", r"h\.\d+\.attn\.masked_bias"] @register_to_config def __init__( From 3fad8036164c19e6fb68cbe6119026ea3d215659 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 8 Jun 2023 14:39:04 +0000 Subject: [PATCH 2/2] make style --- src/diffusers/models/modeling_utils.py | 4 +++- src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py index 135a79adfb61..f6d6bc5711cd 100644 --- a/src/diffusers/models/modeling_utils.py +++ b/src/diffusers/models/modeling_utils.py @@ -639,7 +639,9 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P unexpected_keys = [k for k in unexpected_keys if re.search(pat, k) is None] if len(unexpected_keys) > 0: - logger.warn(f"Some weights of the model checkpoint were not used when initializing {cls.__name__}: \n {[', '.join(unexpected_keys)]}") + logger.warn( + f"Some weights of the model checkpoint were not used when initializing {cls.__name__}: \n {[', '.join(unexpected_keys)]}" + ) else: # else let accelerate handle loading and dispatching. # Load weights and dispatch according to the device_map diff --git a/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py b/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py index 9dfce5d40028..9b962f6e0656 100644 --- a/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py +++ b/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py @@ -60,6 +60,7 @@ class UniDiffuserTextDecoder(ModelMixin, ConfigMixin, ModuleUtilsMixin): Whether to scale keys (K) prior to computing attention (dot-product) and upcast attention dot-product/softmax to float() when training with mixed precision. """ + _keys_to_ignore_on_load_unexpected = [r"h\.\d+\.attn\.bias", r"h\.\d+\.attn\.masked_bias"] @register_to_config