fix: fsdp sharded state dict wont work for save_only_model knob

kmehant · kmehant · commit 4a2e33be0c05 · 2025-03-12T19:16:24.000+05:30
Signed-off-by: Mehant Kammakomati &lt;mehant.kammakomati2@ibm.com&gt;
diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
@@ -5175,6 +5175,12 @@ def create_accelerator_and_postprocess(self):
             raise ValueError(
                 "`auto_find_batch_size` isn't supported yet with DeepSpeed Zero-3. Please consider using Zero-2, Zero-1, or FSDP"
             )
+        if (
+            self.args.save_only_model
+            and self.is_fsdp_enabled
+            and "SHARDED_STATE_DICT" in str(self.accelerator.state.fsdp_plugin.state_dict_type)
+        ):
+            raise ValueError("save_only_model option is not compatible with FSDP state dict type 'SHARDED_STATE_DICT'")
 
     def propagate_args_to_deepspeed(self, auto_find_batch_size=False):
         """