huggingface · Rocketknight1 · Mar 3, 2025 · Feb 28, 2025 · Feb 28, 2025 · Feb 28, 2025
diff --git a/examples/research_projects/movement-pruning/Saving_PruneBERT.ipynb b/examples/research_projects/movement-pruning/Saving_PruneBERT.ipynb
@@ -41,7 +41,7 @@
     "from scipy import sparse\n",
     "from torch import nn\n",
     "\n",
-    "from transformers import *\n",
+    "from transformers import BertForQuestionAnswering\n",
     "\n",
     "\n",
     "os.chdir(\"../../\")"
@@ -307,7 +307,7 @@
     "            print(f\"Skip {name}\")\n",
     "            continue\n",
     "\n",
-    "        if type(param) == torch.Tensor:\n",
+    "        if isinstance(param, torch.Tensor):\n",
     "            if param.numel() == 1:\n",
     "                # module scale\n",
     "                # module zero_point\n",
@@ -319,13 +319,13 @@
     "                param = param.detach().numpy()\n",
     "            hf.create_dataset(name, data=param, compression=\"gzip\", compression_opts=9)\n",
     "\n",
-    "        elif type(param) == float or type(param) == int or type(param) == tuple:\n",
+    "        elif isinstance(param, (float, int, tuple)):\n",
     "            # float - tensor _packed_params.weight.scale\n",
     "            # int   - tensor _packed_params.weight.zero_point\n",
     "            # tuple - tensor _packed_params.weight.shape\n",
     "            hf.attrs[name] = param\n",
     "\n",
-    "        elif type(param) == torch.dtype:\n",
+    "        elif isinstance(param, torch.dtype):\n",
     "            # dtype - tensor _packed_params.dtype\n",
     "            hf.attrs[name] = dtype_2_str[param]\n",
     "\n",
@@ -370,7 +370,7 @@
     "        #             print(f\"Skip {name}\")\n",
     "        #             continue\n",
     "\n",
-    "        if type(param) == torch.Tensor:\n",
+    "        if isinstance(param, torch.Tensor):\n",
     "            if param.numel() == 1:\n",
     "                # module scale\n",
     "                # module zero_point\n",
@@ -382,13 +382,13 @@
     "                param = param.detach().numpy()\n",
     "            hf.create_dataset(name, data=param, compression=\"gzip\", compression_opts=9)\n",
     "\n",
-    "        elif type(param) == float or type(param) == int or type(param) == tuple:\n",
+    "        elif isinstance(param, (float, int, tuple)):\n",
     "            # float - tensor _packed_params.weight.scale\n",
     "            # int   - tensor _packed_params.weight.zero_point\n",
     "            # tuple - tensor _packed_params.weight.shape\n",
     "            hf.attrs[name] = param\n",
     "\n",
-    "        elif type(param) == torch.dtype:\n",
+    "        elif isinstance(param, torch.dtype):\n",
     "            # dtype - tensor _packed_params.dtype\n",
     "            hf.attrs[name] = dtype_2_str[param]\n",
     "\n",
@@ -471,10 +471,10 @@
     "    assert name in reconstructed_elementary_qtz_st, name\n",
     "\n",
     "for name, param in reconstructed_elementary_qtz_st.items():\n",
-    "    assert type(param) == type(elementary_qtz_st[name]), name\n",
-    "    if type(param) == torch.Tensor:\n",
+    "    assert isinstance(param, type(elementary_qtz_st[name])), name\n",
+    "    if isinstance(param, torch.Tensor):\n",
     "        assert torch.all(torch.eq(param, elementary_qtz_st[name])), name\n",
-    "    elif type(param) == np.ndarray:\n",
+    "    elif isinstance(param, np.ndarray):\n",
     "        assert (param == elementary_qtz_st[name]).all(), name\n",
     "    else:\n",
     "        assert param == elementary_qtz_st[name], name"
@@ -532,10 +532,10 @@
     "    assert name in reconstructed_qtz_st, name\n",
     "\n",
     "for name, param in reconstructed_qtz_st.items():\n",
-    "    assert type(param) == type(qtz_st[name]), name\n",
-    "    if type(param) == torch.Tensor:\n",
+    "    assert isinstance(param, type(qtz_st[name])), name\n",
+    "    if isinstance(param, torch.Tensor):\n",
     "        assert torch.all(torch.eq(param, qtz_st[name])), name\n",
-    "    elif type(param) == np.ndarray:\n",
+    "    elif isinstance(param, np.ndarray):\n",
     "        assert (param == qtz_st[name]).all(), name\n",
     "    else:\n",
     "        assert param == qtz_st[name], name"

diff --git a/...ansformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py b/...ansformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py
@@ -114,7 +114,7 @@ def __init__(self, config: ASTConfig) -> None:
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/beit/modeling_beit.py b/src/transformers/models/beit/modeling_beit.py
@@ -270,7 +270,7 @@ def __init__(self, config: BeitConfig, window_size: Optional[tuple] = None) -> N
         self.config = config
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {(config.hidden_size,)} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/beit/modeling_flax_beit.py b/src/transformers/models/beit/modeling_flax_beit.py
@@ -271,7 +271,7 @@ def setup(self):
             self.config, "embedding_size"
         ):
             raise ValueError(
-                f"The hidden size {self.config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {self.config.hidden_size} is not a multiple of the number of attention "
                 f"heads {self.config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/data2vec/modeling_data2vec_vision.py b/src/transformers/models/data2vec/modeling_data2vec_vision.py
@@ -271,7 +271,7 @@ def __init__(self, config: Data2VecVisionConfig, window_size: Optional[tuple] =
         self.config = config
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {(config.hidden_size,)} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/deit/modeling_deit.py b/src/transformers/models/deit/modeling_deit.py
@@ -186,7 +186,7 @@ def __init__(self, config: DeiTConfig) -> None:
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/deprecated/tvlt/modeling_tvlt.py b/src/transformers/models/deprecated/tvlt/modeling_tvlt.py
@@ -345,7 +345,7 @@ def __init__(self, config):
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/deprecated/vit_hybrid/modeling_vit_hybrid.py b/src/transformers/models/deprecated/vit_hybrid/modeling_vit_hybrid.py
@@ -204,7 +204,7 @@ def __init__(self, config: ViTHybridConfig) -> None:
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/dinov2/modeling_dinov2.py b/src/transformers/models/dinov2/modeling_dinov2.py
@@ -178,7 +178,7 @@ def __init__(self, config: Dinov2Config) -> None:
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py b/src/transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py
@@ -190,7 +190,7 @@ def __init__(self, config: Dinov2WithRegistersConfig) -> None:
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/dpt/modeling_dpt.py b/src/transformers/models/dpt/modeling_dpt.py
@@ -301,7 +301,7 @@ def __init__(self, config: DPTConfig) -> None:
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/flava/modeling_flava.py b/src/transformers/models/flava/modeling_flava.py
@@ -438,7 +438,7 @@ def __init__(self, config: FlavaPossibleConfigs) -> None:
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/ijepa/modeling_ijepa.py b/src/transformers/models/ijepa/modeling_ijepa.py
@@ -194,7 +194,7 @@ def __init__(self, config: IJepaConfig) -> None:
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/luke/modeling_luke.py b/src/transformers/models/luke/modeling_luke.py
@@ -501,7 +501,7 @@ def __init__(self, config):
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/mobilevit/modeling_mobilevit.py b/src/transformers/models/mobilevit/modeling_mobilevit.py
@@ -215,7 +215,7 @@ def __init__(self, config: MobileViTConfig, hidden_size: int) -> None:
 
         if hidden_size % config.num_attention_heads != 0:
             raise ValueError(
-                f"The hidden size {hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/mobilevit/modeling_tf_mobilevit.py b/src/transformers/models/mobilevit/modeling_tf_mobilevit.py
@@ -262,7 +262,7 @@ def __init__(self, config: MobileViTConfig, hidden_size: int, **kwargs) -> None:
 
         if hidden_size % config.num_attention_heads != 0:
             raise ValueError(
-                f"The hidden size {hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/qwen2_audio/processing_qwen2_audio.py b/src/transformers/models/qwen2_audio/processing_qwen2_audio.py
@@ -112,7 +112,7 @@ def __call__(
 
         # ensure we have as much audios as audio tokens
         num_audio_tokens = sum(sample.count(self.audio_token) for sample in text)
-        num_audios = 1 if type(audios) == np.ndarray else len(audios)
+        num_audios = 1 if isinstance(audios, np.ndarray) else len(audios)
         if num_audio_tokens != num_audios:
             raise ValueError(
                 f"Found {num_audio_tokens} {self.audio_token} token{'s' if num_audio_tokens > 1 else ''} in provided text but received {num_audios} audio{'s' if num_audios > 1 else ''}"

diff --git a/src/transformers/models/videomae/modeling_videomae.py b/src/transformers/models/videomae/modeling_videomae.py
@@ -201,7 +201,7 @@ def __init__(self, config: VideoMAEConfig) -> None:
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/vilt/modeling_vilt.py b/src/transformers/models/vilt/modeling_vilt.py
@@ -322,7 +322,7 @@ def __init__(self, config):
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/vit/modeling_vit.py b/src/transformers/models/vit/modeling_vit.py
@@ -189,7 +189,7 @@ def __init__(self, config: ViTConfig) -> None:
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/vit_mae/modeling_vit_mae.py b/src/transformers/models/vit_mae/modeling_vit_mae.py
@@ -362,7 +362,7 @@ def __init__(self, config: ViTMAEConfig) -> None:
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/vit_msn/modeling_vit_msn.py b/src/transformers/models/vit_msn/modeling_vit_msn.py
@@ -179,7 +179,7 @@ def __init__(self, config: ViTMSNConfig) -> None:
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/vitpose_backbone/modeling_vitpose_backbone.py b/src/transformers/models/vitpose_backbone/modeling_vitpose_backbone.py
@@ -109,7 +109,7 @@ def __init__(self, config: VitPoseBackboneConfig) -> None:
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/vivit/modeling_vivit.py b/src/transformers/models/vivit/modeling_vivit.py
@@ -172,7 +172,7 @@ def __init__(self, config: VivitConfig) -> None:
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )
 

diff --git a/src/transformers/models/yolos/modeling_yolos.py b/src/transformers/models/yolos/modeling_yolos.py
@@ -237,7 +237,7 @@ def __init__(self, config: YolosConfig) -> None:
         super().__init__()
         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
             raise ValueError(
-                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"The hidden size {config.hidden_size} is not a multiple of the number of attention "
                 f"heads {config.num_attention_heads}."
             )