From fae2541ca2234ff00212895f50886556f5e67cb4 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Thu, 8 Jun 2023 14:37:27 +0000
Subject: [PATCH 1/2] Fix loading

---
 src/diffusers/models/modeling_utils.py            | 15 +++++++++++++++
 .../unidiffuser/modeling_text_decoder.py          |  1 +
 2 files changed, 16 insertions(+)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index c9fabf93253b..135a79adfb61 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -17,6 +17,7 @@
 import inspect
 import itertools
 import os
+import re
 from functools import partial
 from typing import Any, Callable, List, Optional, Tuple, Union
 
@@ -162,6 +163,7 @@ class ModelMixin(torch.nn.Module):
     config_name = CONFIG_NAME
     _automatically_saved_args = ["_diffusers_version", "_class_name", "_name_or_path"]
     _supports_gradient_checkpointing = False
+    _keys_to_ignore_on_load_unexpected = None
 
     def __init__(self):
         super().__init__()
@@ -608,6 +610,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                             " `low_cpu_mem_usage=False` and `device_map=None` if you want to randomly initialize"
                             " those weights or else make sure your checkpoint file is correct."
                         )
+                    unexpected_keys = []
 
                     empty_state_dict = model.state_dict()
                     for param_name, param in state_dict.items():
@@ -615,6 +618,10 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                             inspect.signature(set_module_tensor_to_device).parameters.keys()
                         )
 
+                        if param_name not in empty_state_dict:
+                            unexpected_keys.append(param_name)
+                            continue
+
                         if empty_state_dict[param_name].shape != param.shape:
                             raise ValueError(
                                 f"Cannot load {pretrained_model_name_or_path} because {param_name} expected shape {empty_state_dict[param_name]}, but got {param.shape}. If you want to instead overwrite randomly initialized weights, please make sure to pass both `low_cpu_mem_usage=False` and `ignore_mismatched_sizes=True`. For more information, see also: https://github.com/huggingface/diffusers/issues/1619#issuecomment-1345604389 as an example."
@@ -626,6 +633,14 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                             )
                         else:
                             set_module_tensor_to_device(model, param_name, param_device, value=param)
+
+                    if cls._keys_to_ignore_on_load_unexpected is not None:
+                        for pat in cls._keys_to_ignore_on_load_unexpected:
+                            unexpected_keys = [k for k in unexpected_keys if re.search(pat, k) is None]
+
+                    if len(unexpected_keys) > 0:
+                        logger.warn(f"Some weights of the model checkpoint were not used when initializing {cls.__name__}: \n {[', '.join(unexpected_keys)]}")
+
                 else:  # else let accelerate handle loading and dispatching.
                     # Load weights and dispatch according to the device_map
                     # by default the device_map is None and the weights are loaded on the CPU
diff --git a/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py b/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py
index febc8e09e6ab..9dfce5d40028 100644
--- a/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py
+++ b/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py
@@ -60,6 +60,7 @@ class UniDiffuserTextDecoder(ModelMixin, ConfigMixin, ModuleUtilsMixin):
             Whether to scale keys (K) prior to computing attention (dot-product) and upcast attention
             dot-product/softmax to float() when training with mixed precision.
     """
+    _keys_to_ignore_on_load_unexpected = [r"h\.\d+\.attn\.bias", r"h\.\d+\.attn\.masked_bias"]
 
     @register_to_config
     def __init__(

From 3fad8036164c19e6fb68cbe6119026ea3d215659 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Thu, 8 Jun 2023 14:39:04 +0000
Subject: [PATCH 2/2] make style

---
 src/diffusers/models/modeling_utils.py                       | 4 +++-
 src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
index 135a79adfb61..f6d6bc5711cd 100644
--- a/src/diffusers/models/modeling_utils.py
+++ b/src/diffusers/models/modeling_utils.py
@@ -639,7 +639,9 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                             unexpected_keys = [k for k in unexpected_keys if re.search(pat, k) is None]
 
                     if len(unexpected_keys) > 0:
-                        logger.warn(f"Some weights of the model checkpoint were not used when initializing {cls.__name__}: \n {[', '.join(unexpected_keys)]}")
+                        logger.warn(
+                            f"Some weights of the model checkpoint were not used when initializing {cls.__name__}: \n {[', '.join(unexpected_keys)]}"
+                        )
 
                 else:  # else let accelerate handle loading and dispatching.
                     # Load weights and dispatch according to the device_map
diff --git a/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py b/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py
index 9dfce5d40028..9b962f6e0656 100644
--- a/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py
+++ b/src/diffusers/pipelines/unidiffuser/modeling_text_decoder.py
@@ -60,6 +60,7 @@ class UniDiffuserTextDecoder(ModelMixin, ConfigMixin, ModuleUtilsMixin):
             Whether to scale keys (K) prior to computing attention (dot-product) and upcast attention
             dot-product/softmax to float() when training with mixed precision.
     """
+
     _keys_to_ignore_on_load_unexpected = [r"h\.\d+\.attn\.bias", r"h\.\d+\.attn\.masked_bias"]
 
     @register_to_config