huggingface · ArthurZucker · May 2, 2025 · May 1, 2025 · May 1, 2025 · May 1, 2025
diff --git a/src/transformers/quantizers/quantizer_torchao.py b/src/transformers/quantizers/quantizer_torchao.py
@@ -247,6 +247,16 @@ def create_quantized_param(
             module._parameters[tensor_name] = torch.nn.Parameter(
                 param_value, requires_grad=param_value.requires_grad
             ).to(device=target_device)
+            # if we are quantizing tied parameters, to avoid tying the quantized weights
+            # the correct order to do it is
+            # 1. load the weight to model
+            # 2. run tie_weights to populate the weights
+            # 3. quantize
+            input_embed = model.get_input_embeddings()
+            if self.quantization_config.untie_embedding_weights and id(module) == id(input_embed):
+                model.tie_weights()
+                setattr(model.config.get_text_config(decoder=True), "tie_word_embeddings", False)
+
             # handle AOPerModuleConfig, introduced in torchao 0.11.0+
             if self.quantization_config._get_ao_version() > version.Version("0.10.0"):
                 from torchao.quantization import AOPerModuleConfig

diff --git a/src/transformers/utils/quantization_config.py b/src/transformers/utils/quantization_config.py
@@ -1555,6 +1555,7 @@ class TorchAoConfig(QuantizationConfigMixin):
     modules_to_not_convert: Optional[List]
     quant_type_kwargs: Dict[str, Any]
     include_embedding: bool
+    untie_embedding_weights: bool
 
     """This is a config class for torchao quantization/sparsity techniques.
 
@@ -1569,6 +1570,9 @@ class TorchAoConfig(QuantizationConfigMixin):
         inlcude_embedding (`bool`, default to `False`):
             Whether to include embedding in quantization or not, input embedding will be removed from
             the module_not_to_convert list as well if this flag is set.
+        untie_embedding_weights (`bool`, default to `False`):
+            Whether to untie the weights when we are quantizing input embedding weights that is tied
+            to other weights.
         kwargs (`Dict[str, Any]`, *optional*):
             The keyword arguments for the chosen type of quantization, for example, int4_weight_only quantization supports two keyword arguments
             `group_size` and `inner_k_tiles` currently. More API examples and documentation of arguments can be found in
@@ -1614,13 +1618,15 @@ def __init__(
         quant_type: Union[str, "AOBaseConfig"],  # noqa: F821
         modules_to_not_convert: Optional[List] = None,
         include_embedding: bool = False,
+        untie_embedding_weights: bool = False,
         **kwargs,
     ):
         self.quant_method = QuantizationMethod.TORCHAO
         self.quant_type = quant_type
         self.modules_to_not_convert = modules_to_not_convert
         self.quant_type_kwargs = kwargs.get("quant_type_kwargs", kwargs)
         self.include_embedding = include_embedding
+        self.untie_embedding_weights = untie_embedding_weights
         self.post_init()
 
     @staticmethod