up

metascroy · metascroy · commit f5cb0cefbe56 · 2025-04-23T11:18:47.000-07:00
diff --git a/torchao/quantization/qat/embedding.py b/torchao/quantization/qat/embedding.py
@@ -235,7 +235,10 @@ def _convert_helper(self, module: torch.nn.Module):
                 # Load weights and qparams into quantized embedding
                 (qmin, qmax) = _get_qmin_qmax(self.bit_width)
                 (s, zp) = get_group_qparams_symmetric(
-                    child.weight, self.bit_width, group_size, precision=scale_precision,
+                    child.weight,
+                    self.bit_width,
+                    group_size,
+                    precision=scale_precision,
                 )
                 q_weight = _quantized_decomposed_quantize_per_channel_group_wrapper(
                     child.weight,
diff --git a/torchao/quantization/qat/linear.py b/torchao/quantization/qat/linear.py
@@ -219,7 +219,10 @@ def _convert_qat_linear_8da4w(self, module: torch.nn.Module):
                 n_bit = 4
                 (qmin, qmax) = _get_qmin_qmax(n_bit)
                 (s, zp) = get_group_qparams_symmetric(
-                    child.weight, n_bit, config.group_size, precision=scale_precision,
+                    child.weight,
+                    n_bit,
+                    config.group_size,
+                    precision=config.scale_precision,
                 )
                 from torchao._executorch_ops import (
                     _quantized_decomposed_quantize_per_channel_group_wrapper,