huggingface · Rocketknight1 · Apr 11, 2025 · Mar 27, 2025
diff --git a/src/transformers/data/datasets/glue.py b/src/transformers/data/datasets/glue.py
@@ -122,7 +122,7 @@ def __init__(
         with FileLock(lock_path):
             if os.path.exists(cached_features_file) and not args.overwrite_cache:
                 start = time.time()
-                self.features = torch.load(cached_features_file)
+                self.features = torch.load(cached_features_file, weights_only=True)
                 logger.info(
                     f"Loading features from cached file {cached_features_file} [took %.3f s]", time.time() - start
                 )

diff --git a/src/transformers/models/bark/convert_suno_to_hf.py b/src/transformers/models/bark/convert_suno_to_hf.py
@@ -109,7 +109,7 @@ def _load_model(ckpt_path, device, use_small=False, model_type="text"):
     if not os.path.exists(ckpt_path):
         logger.info(f"{model_type} model not found, downloading into `{CACHE_DIR}`.")
         _download(model_info["repo_id"], model_info["file_name"])
-    checkpoint = torch.load(ckpt_path, map_location=device)
+    checkpoint = torch.load(ckpt_path, map_location=device, weights_only=True)
     # this is a hack
     model_args = checkpoint["model_args"]
     if "input_vocab_size" not in model_args:

diff --git a/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py
@@ -71,7 +71,7 @@ def rename_key(dct, old, new):
 
 def load_xsum_checkpoint(checkpoint_path):
     """Checkpoint path should end in model.pt"""
-    sd = torch.load(checkpoint_path, map_location="cpu")
+    sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
     hub_interface = torch.hub.load("pytorch/fairseq", "bart.large.cnn").eval()
     hub_interface.model.load_state_dict(sd["model"])
     return hub_interface

diff --git a/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py b/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py
@@ -101,7 +101,7 @@ def main(raw_args=None):
 
     model = BertModel.from_pretrained(
         pretrained_model_name_or_path=args.model_name,
-        state_dict=torch.load(args.pytorch_model_path),
+        state_dict=torch.load(args.pytorch_model_path, weights_only=True),
         cache_dir=args.cache_dir,
     )
 

diff --git a/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py
@@ -168,7 +168,7 @@ def convert_biogpt_checkpoint_to_pytorch(biogpt_checkpoint_path, pytorch_dump_fo
     checkpoint_file = os.path.join(biogpt_checkpoint_path, "checkpoint.pt")
     if not os.path.isfile(checkpoint_file):
         raise ValueError(f"path to the file {checkpoint_file} does not exist!")
-    chkpt = torch.load(checkpoint_file, map_location="cpu")
+    chkpt = torch.load(checkpoint_file, map_location="cpu", weights_only=True)
 
     args = chkpt["cfg"]["model"]
 

diff --git a/...ansformers/models/blenderbot/convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py b/...ansformers/models/blenderbot/convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py
@@ -79,7 +79,7 @@ def convert_parlai_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_
     """
     Copy/paste/tweak model's weights to our BERT structure.
     """
-    model = torch.load(checkpoint_path, map_location="cpu")
+    model = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
     sd = model["model"]
     cfg = BlenderbotConfig.from_json_file(config_json_path)
     m = BlenderbotForConditionalGeneration(cfg)

diff --git a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py
@@ -104,7 +104,7 @@ def convert_bloom_checkpoint_to_pytorch(
             for i in range(pretraining_tp):
                 # load all TP files
                 f_name = file.replace("model_00", f"model_0{i}")
-                temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu")
+                temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu", weights_only=True)
 
                 # Rename keys in the transformers names
                 keys = list(temp.keys())
@@ -164,7 +164,7 @@ def convert_bloom_checkpoint_to_pytorch(
             for i in range(pretraining_tp):
                 # load all TP files
                 f_name = file.replace("model_00", f"model_0{i}")
-                temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu")
+                temp = torch.load(os.path.join(bloom_checkpoint_path, f_name), map_location="cpu", weights_only=True)
 
                 # Rename keys in the transformers names
                 keys = list(temp.keys())

diff --git a/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py b/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py
@@ -130,13 +130,15 @@ def write_model(model_path, input_base_path, model_size, chameleon_version=1):
         for possible_name in ["consolidated.pth", "consolidated.00.pth"]:
             possible_path = os.path.join(input_model_path, possible_name)
             if os.path.exists(possible_path):
-                loaded = torch.load(possible_path, map_location="cpu")
+                loaded = torch.load(possible_path, map_location="cpu", weights_only=True)
                 break
         assert loaded is not None
     else:
         # Sharded
         loaded = [
-            torch.load(os.path.join(input_model_path, f"consolidated.{i:02d}.pth"), map_location="cpu")
+            torch.load(
+                os.path.join(input_model_path, f"consolidated.{i:02d}.pth"), map_location="cpu", weights_only=True
+            )
             for i in range(num_shards)
         ]
 
@@ -314,7 +316,7 @@ def permute(w, n_heads, dim1=dim, dim2=dim):
 
     # Load VQGAN weights
     vqgan_path = os.path.join(input_base_path, "tokenizer/vqgan.ckpt")
-    vqgan_state_dict = torch.load(vqgan_path, map_location="cpu")["state_dict"]
+    vqgan_state_dict = torch.load(vqgan_path, map_location="cpu", weights_only=True)["state_dict"]
     for k, v in vqgan_state_dict.items():
         if "decoder" in k:
             continue  # we dont do image generation yet

diff --git a/src/transformers/models/chinese_clip/convert_chinese_clip_original_pytorch_to_hf.py b/src/transformers/models/chinese_clip/convert_chinese_clip_original_pytorch_to_hf.py
@@ -104,7 +104,7 @@ def convert_chinese_clip_checkpoint(checkpoint_path, pytorch_dump_folder_path, c
 
     hf_model = ChineseCLIPModel(config).eval()
 
-    pt_weights = torch.load(checkpoint_path, map_location="cpu")["state_dict"]
+    pt_weights = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["state_dict"]
     pt_weights = {(name[7:] if name.startswith("module.") else name): value for name, value in pt_weights.items()}
 
     copy_text_model_and_projection(hf_model, pt_weights)

diff --git a/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py b/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py
@@ -169,7 +169,7 @@ def convert_clipseg_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_
     model = CLIPSegForImageSegmentation(config)
     model.eval()
 
-    state_dict = torch.load(checkpoint_path, map_location="cpu")
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
 
     # remove some keys
     for key in state_dict.copy().keys():

diff --git a/src/transformers/models/clvp/convert_clvp_to_hf.py b/src/transformers/models/clvp/convert_clvp_to_hf.py
@@ -201,9 +201,9 @@ def convert_clvp_weights(checkpoint_path, pytorch_dump_folder_path):
             _download(url=each_model_url, root=each_model_path)
 
         if each_model_name == "clvp":
-            clvp_checkpoint = torch.load(each_model_path, map_location="cpu")
+            clvp_checkpoint = torch.load(each_model_path, map_location="cpu", weights_only=True)
         else:
-            decoder_checkpoint = torch.load(each_model_path, map_location="cpu")
+            decoder_checkpoint = torch.load(each_model_path, map_location="cpu", weights_only=True)
 
     # Converting the weights
     converted_checkpoint.update(**convert_encoder_weights(clvp_checkpoint))

diff --git a/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py
@@ -309,7 +309,7 @@ def convert_cvt_checkpoint(cvt_model, image_size, cvt_file_name, pytorch_dump_fo
     model = CvtForImageClassification(config)
     image_processor = AutoImageProcessor.from_pretrained("facebook/convnext-base-224-22k-1k")
     image_processor.size["shortest_edge"] = image_size
-    original_weights = torch.load(cvt_file_name, map_location=torch.device("cpu"))
+    original_weights = torch.load(cvt_file_name, map_location=torch.device("cpu"), weights_only=True)
 
     huggingface_weights = OrderedDict()
     list_of_state_dict = []

diff --git a/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py
@@ -143,7 +143,7 @@ def write_model(model_name, pretrained_model_weights_path, pytorch_dump_folder_p
     config.id2label = id2label
     config.label2id = {v: k for k, v in id2label.items()}
     # load original model from local path
-    loaded = torch.load(pretrained_model_weights_path, map_location=torch.device("cpu"))["model"]
+    loaded = torch.load(pretrained_model_weights_path, map_location=torch.device("cpu"), weights_only=True)["model"]
     # Renaming the original model state dictionary to HF compatibile
     all_keys = list(loaded.keys())
     new_keys = convert_old_keys_to_new_keys(all_keys)

diff --git a/src/transformers/models/dac/convert_dac_checkpoint.py b/src/transformers/models/dac/convert_dac_checkpoint.py
@@ -205,7 +205,7 @@ def convert_checkpoint(
     sample_rate=16000,
     repo_id=None,
 ):
-    model_dict = torch.load(checkpoint_path, "cpu")
+    model_dict = torch.load(checkpoint_path, "cpu", weights_only=True)
 
     config = DacConfig()
 

diff --git a/...formers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py b/...formers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py
@@ -224,7 +224,7 @@ def load(module, prefix=""):
     )
     patch_size = model.patch_embed.patch_size
     args.window_size = (args.input_size // patch_size[0], args.input_size // patch_size[1])
-    checkpoint = torch.load(args.beit_checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.beit_checkpoint, map_location="cpu", weights_only=True)
 
     print(f"Load ckpt from {args.beit_checkpoint}")
     checkpoint_model = None

diff --git a/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py b/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py
@@ -125,7 +125,7 @@ def convert_deformable_detr_checkpoint(
     logger.info("Converting model...")
 
     # load original state dict
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
     # rename keys
     for key in state_dict.copy().keys():
         val = state_dict.pop(key)

diff --git a/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py b/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py
@@ -229,7 +229,7 @@ def convert_deta_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub):
     else:
         raise ValueError(f"Model name {model_name} not supported")
     checkpoint_path = hf_hub_download(repo_id="nielsr/deta-checkpoints", filename=filename)
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
 
     # rename keys
     rename_keys = create_rename_keys(config)

diff --git a/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py b/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py
@@ -230,7 +230,7 @@ def convert_deta_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub):
     else:
         raise ValueError(f"Model name {model_name} not supported")
 
-    state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
 
     # original state dict
     for name, param in state_dict.items():

diff --git a/...recated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py b/...recated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py
@@ -123,7 +123,7 @@ def prepare_img():
 def convert_efficientformer_checkpoint(
     checkpoint_path: Path, efficientformer_config_file: Path, pytorch_dump_path: Path, push_to_hub: bool
 ):
-    orig_state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+    orig_state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
     config = EfficientFormerConfig.from_json_file(efficientformer_config_file)
     model = EfficientFormerForImageClassificationWithTeacher(config)
     model_name = "_".join(checkpoint_path.split("/")[-1].split(".")[0].split("_")[:-1])

diff --git a/src/transformers/models/deprecated/jukebox/convert_jukebox.py b/src/transformers/models/deprecated/jukebox/convert_jukebox.py
@@ -228,7 +228,7 @@ def convert_openai_checkpoint(model_name=None, pytorch_dump_folder_path=None):
     weight_dict = []
     mapping = {}
     for i, dict_name in enumerate(model_to_convert):
-        old_dic = torch.load(f"{pytorch_dump_folder_path}/{dict_name.split('/')[-1]}")["model"]
+        old_dic = torch.load(f"{pytorch_dump_folder_path}/{dict_name.split('/')[-1]}", weights_only=True)["model"]
 
         new_dic = {}
         for k in old_dic.keys():

diff --git a/...ransformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py b/...ransformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py
@@ -132,13 +132,17 @@ def convert_checkpoint_to_huggingface(pretrained_checkpoint_path, output_path, i
     print(
         "Original Mega encoder:",
         original_mlm.mega.load_state_dict(
-            torch.load(os.path.join(pretrained_checkpoint_path, "encoder_weights.pt"), map_location="cpu")
+            torch.load(
+                os.path.join(pretrained_checkpoint_path, "encoder_weights.pt"), map_location="cpu", weights_only=True
+            )
         ),
     )
     print(
         "Original Mega MLM layer:",
         original_mlm.mlm_head.load_state_dict(
-            torch.load(os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu")
+            torch.load(
+                os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu", weights_only=True
+            )
         ),
     )
 
@@ -234,7 +238,9 @@ def convert_checkpoint_to_huggingface(pretrained_checkpoint_path, output_path, i
     print(
         "HF Mega MLM layer:",
         hf_mlm.mlm_head.load_state_dict(
-            torch.load(os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu")
+            torch.load(
+                os.path.join(pretrained_checkpoint_path, "mlm_head_weights.pt"), map_location="cpu", weights_only=True
+            )
         ),
     )
 

diff --git a/src/transformers/models/deprecated/van/convert_van_to_pytorch.py b/src/transformers/models/deprecated/van/convert_van_to_pytorch.py
@@ -129,7 +129,7 @@ def convert_weight_and_push(
     print(f"Downloading weights for {name}...")
     checkpoint_path = cached_download(checkpoint)
     print(f"Converting {name}...")
-    from_state_dict = torch.load(checkpoint_path)["state_dict"]
+    from_state_dict = torch.load(checkpoint_path, weights_only=True)["state_dict"]
     from_model.load_state_dict(from_state_dict)
     from_model.eval()
     with torch.no_grad():

diff --git a/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py b/src/transformers/models/depth_anything/convert_depth_anything_to_hf.py
@@ -229,7 +229,7 @@ def convert_dpt_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub, ve
         filename=f"{filename}",
     )
 
-    state_dict = torch.load(filepath, map_location="cpu")
+    state_dict = torch.load(filepath, map_location="cpu", weights_only=True)
     # rename keys
     rename_keys = create_rename_keys(config)
     for src, dest in rename_keys:

diff --git a/src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
@@ -27,7 +27,7 @@
 
 
 def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str):
-    d = torch.load(checkpoint_path)
+    d = torch.load(checkpoint_path, weights_only=True)
     d[NEW_KEY] = d.pop(OLD_KEY)
     os.makedirs(pytorch_dump_folder_path, exist_ok=True)
     torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME))

diff --git a/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py b/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py
@@ -29,7 +29,9 @@
 
 def load_states_from_checkpoint(model_file: str) -> CheckpointState:
     print(f"Reading saved model from {model_file}")
-    state_dict = torch.load(model_file, map_location=lambda s, l: default_restore_location(s, "cpu"))
+    state_dict = torch.load(
+        model_file, map_location=lambda s, l: default_restore_location(s, "cpu"), weights_only=True
+    )
     return CheckpointState(**state_dict)
 
 

diff --git a/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py b/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py
@@ -226,7 +226,7 @@ def convert_dpt_checkpoint(checkpoint_url, pytorch_dump_folder_path, push_to_hub
     config, expected_shape = get_dpt_config(checkpoint_url)
     # load original state_dict from URL
     # state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu")
-    state_dict = torch.load(checkpoint_url, map_location="cpu")
+    state_dict = torch.load(checkpoint_url, map_location="cpu", weights_only=True)
     # remove certain keys
     remove_ignore_keys_(state_dict)
     # rename keys

diff --git a/src/transformers/models/encodec/convert_encodec_checkpoint_to_pytorch.py b/src/transformers/models/encodec/convert_encodec_checkpoint_to_pytorch.py
@@ -325,7 +325,7 @@ def convert_checkpoint(
     )
     feature_extractor.save_pretrained(pytorch_dump_folder_path)
 
-    original_checkpoint = torch.load(checkpoint_path)
+    original_checkpoint = torch.load(checkpoint_path, weights_only=True)
     if "best_state" in original_checkpoint:
         # we might have a training state saved, in which case discard the yaml results and just retain the weights
         original_checkpoint = original_checkpoint["best_state"]

diff --git a/...speech2_conformer/convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py b/...speech2_conformer/convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py
@@ -164,7 +164,7 @@ def convert_FastSpeech2ConformerModel_checkpoint(
     # Prepare the model
     model = FastSpeech2ConformerModel(config)
 
-    espnet_checkpoint = torch.load(checkpoint_path)
+    espnet_checkpoint = torch.load(checkpoint_path, weights_only=True)
     hf_compatible_state_dict = convert_espnet_state_dict_to_hf(espnet_checkpoint)
 
     model.load_state_dict(hf_compatible_state_dict)

diff --git a/src/transformers/models/fastspeech2_conformer/convert_hifigan.py b/src/transformers/models/fastspeech2_conformer/convert_hifigan.py
@@ -104,7 +104,7 @@ def convert_hifigan_checkpoint(
 
     model = FastSpeech2ConformerHifiGan(config)
 
-    orig_checkpoint = torch.load(checkpoint_path)
+    orig_checkpoint = torch.load(checkpoint_path, weights_only=True)
     load_weights(orig_checkpoint, model, config)
 
     model.save_pretrained(pytorch_dump_folder_path)

diff --git a/src/transformers/models/fastspeech2_conformer/convert_model_with_hifigan.py b/src/transformers/models/fastspeech2_conformer/convert_model_with_hifigan.py
@@ -51,7 +51,7 @@ def convert_FastSpeech2ConformerWithHifiGan_checkpoint(
 
     model = FastSpeech2ConformerModel(model_config)
 
-    espnet_checkpoint = torch.load(checkpoint_path)
+    espnet_checkpoint = torch.load(checkpoint_path, weights_only=True)
     hf_compatible_state_dict = convert_espnet_state_dict_to_hf(espnet_checkpoint)
     model.load_state_dict(hf_compatible_state_dict)
 

diff --git a/src/transformers/models/flava/convert_dalle_to_flava_codebook.py b/src/transformers/models/flava/convert_dalle_to_flava_codebook.py
@@ -62,7 +62,7 @@ def convert_dalle_checkpoint(checkpoint_path, pytorch_dump_folder_path, config_p
 
     encoder = Encoder()
     if os.path.exists(checkpoint_path):
-        ckpt = torch.load(checkpoint_path)
+        ckpt = torch.load(checkpoint_path, weights_only=True)
     else:
         ckpt = torch.hub.load_state_dict_from_url(checkpoint_path)
 

diff --git a/src/transformers/models/flava/convert_flava_original_pytorch_to_hf.py b/src/transformers/models/flava/convert_flava_original_pytorch_to_hf.py
@@ -73,7 +73,7 @@ def convert_flava_checkpoint(checkpoint_path, codebook_path, pytorch_dump_folder
     codebook_state_dict = convert_dalle_checkpoint(codebook_path, None, save_checkpoint=False)
 
     if os.path.exists(checkpoint_path):
-        state_dict = torch.load(checkpoint_path, map_location="cpu")
+        state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
     else:
         state_dict = torch.hub.load_state_dict_from_url(checkpoint_path, map_location="cpu")
 

diff --git a/src/transformers/models/fuyu/convert_fuyu_model_weights_to_hf.py b/src/transformers/models/fuyu/convert_fuyu_model_weights_to_hf.py
@@ -87,7 +87,7 @@ def rename_state_dict(state_dict):
 
 def convert_fuyu_checkpoint(pytorch_dump_folder_path, ada_lib_path, pt_model_path, safe_serialization=False):
     sys.path.insert(0, ada_lib_path)
-    model_state_dict_base = torch.load(pt_model_path, map_location="cpu")
+    model_state_dict_base = torch.load(pt_model_path, map_location="cpu", weights_only=True)
     state_dict = flatdict.FlatDict(model_state_dict_base["model"], ".")
     state_dict = rename_state_dict(state_dict)
 

diff --git a/src/transformers/models/gemma/convert_gemma_weights_to_hf.py b/src/transformers/models/gemma/convert_gemma_weights_to_hf.py
@@ -72,7 +72,7 @@ def write_model(save_path, input_base_path, config, safe_serialization=True, pus
     head_dim = config.head_dim
 
     print(f"Fetching all parameters from the checkpoint at '{input_base_path}'")
-    model_state_dict = torch.load(input_base_path, map_location="cpu")["model_state_dict"]
+    model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)["model_state_dict"]
     model_state_dict.pop("freqs_cis")
 
     state_dict = {}

diff --git a/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py b/src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py
@@ -97,11 +97,11 @@ def write_model(save_path, input_base_path, config, safe_serialization=True, pus
 
         for file in files:
             print(file)
-            loaded_state_dict = torch.load(os.path.join(input_base_path, file), map_location="cpu")
+            loaded_state_dict = torch.load(os.path.join(input_base_path, file), map_location="cpu", weights_only=True)
             model_state_dict.update(loaded_state_dict)
     else:
         print("Model does not seem to be sharded")
-        model_state_dict = torch.load(input_base_path, map_location="cpu")["model_state_dict"]
+        model_state_dict = torch.load(input_base_path, map_location="cpu", weights_only=True)["model_state_dict"]
         model_state_dict.pop("freqs_cis")
 
     state_dict = {}

diff --git a/src/transformers/models/git/convert_git_to_pytorch.py b/src/transformers/models/git/convert_git_to_pytorch.py
@@ -297,7 +297,7 @@ def convert_git_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub=Fal
     if "large" in model_name and not is_video and "large-r" not in model_name:
         # large checkpoints take way too long to download
         checkpoint_path = model_name_to_path[model_name]
-        state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
+        state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"]
     else:
         checkpoint_url = model_name_to_url[model_name]
         state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu", file_name=model_name)[

diff --git a/src/transformers/models/glm/convert_glm_weights_to_hf.py b/src/transformers/models/glm/convert_glm_weights_to_hf.py
@@ -53,7 +53,7 @@ def load_weights(input_dir: str):
     elif bin_files:
         bin_files = sorted(bin_files, key=lambda x: int(x.rsplit("-", 3)[1]))
         for file in bin_files:
-            tensors = torch.load(file, map_location="cpu")
+            tensors = torch.load(file, map_location="cpu", weights_only=True)
             all_weights.update(tensors)
         return all_weights