up

metascroy · metascroy · commit 30155488f05e · 2025-04-21T09:58:38.000-07:00
diff --git a/torchao/quantization/GPTQ.py b/torchao/quantization/GPTQ.py
@@ -935,8 +935,11 @@ def linear_forward_8da4w(
 ):
     # to match torchao.quantization.quant_api._int8_asymm_per_token_quant
     x = per_token_dynamic_quant(
-        x, scale_dtype=torch.float64, zero_point_dtype=torch.int64
+        x, scale_dtype=torch.float32, zero_point_dtype=torch.float32
     )
+    # x = per_token_dynamic_quant(
+    #     x, scale_dtype=torch.float64, zero_point_dtype=torch.int64
+    # )
     # TODO: verify and remove following reshape code
     # origin_x_size = x.size()
     # x = x.reshape(-1, origin_x_size[-1])
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -568,14 +568,16 @@ def _int8_asymm_per_token_quant(x: torch.Tensor) -> torch.Tensor:
     """This is defined here instead of local function to support serialization"""
     mapping_type = MappingType.ASYMMETRIC
     target_dtype = torch.int8
+    scale_dtype = torch.float32
+    zero_point_dtype = torch.int32
     if TORCH_VERSION_AT_LEAST_2_6:
         return to_affine_quantized_intx(
             x,
             mapping_type,
             _get_per_token_block_size(x),
             target_dtype,
-            scale_dtype=torch.float64,
-            zero_point_dtype=torch.int64,
+            scale_dtype=scale_dtype,
+            zero_point_dtype=zero_point_dtype,
         )
     else:
         return to_affine_quantized_intx(
@@ -649,7 +651,6 @@ def _int8_dynamic_activation_int4_weight_transform(
     # weight settings
     block_size = (1, group_size)
     target_dtype = torch.int8
-    eps = torch.finfo(torch.float32).eps
     quant_min = -8
     quant_max = 7
 
@@ -680,7 +681,6 @@ def _int8_dynamic_activation_int4_weight_transform(
             target_dtype,
             quant_min,
             quant_max,
-            eps,
             _layout=layout,
         )
     weight = to_linear_activation_quantized(weight, input_quant_func)
@@ -793,7 +793,6 @@ def _int8_dynamic_activation_intx_weight_transform(
         target_dtype=torch.int8,
         quant_min=quant_min,
         quant_max=quant_max,
-        eps=torch.finfo(torch.float32).eps,
         scale_dtype=weight_scale_dtype,
         zero_point_dtype=torch.int8,
         preserve_zero=(weight_mapping_type == MappingType.SYMMETRIC),
@@ -1830,7 +1829,6 @@ def _intx_weight_only_transform(
         target_dtype=torch.int8,
         quant_min=quant_min,
         quant_max=quant_max,
-        eps=torch.finfo(torch.float32).eps,
         scale_dtype=scale_dtype,
         zero_point_dtype=torch.int8,
         preserve_zero=(mapping_type == MappingType.SYMMETRIC),
diff --git a/torchao/quantization/utils.py b/torchao/quantization/utils.py
@@ -556,7 +556,7 @@ def get_group_qparams_symmetric(
         quant_max=quant_max,
         eps=eps,
         scale_dtype=precision,
-        zero_point_dtype=torch.int32,
+        zero_point_dtype=precision,
     )
     return scale.reshape(w.shape[0], -1), zero_point.reshape(w.shape[0], -1)
 

Original file line number	Diff line number	Diff line change
`@@ -556,7 +556,7 @@ def get_group_qparams_symmetric(`
`556`	`556`	`quant_max=quant_max,`
`557`	`557`	`eps=eps,`
`558`	`558`	`scale_dtype=precision,`
`559`		`- zero_point_dtype=torch.int32,`
	`559`	`+ zero_point_dtype=precision,`
`560`	`560`	`)`
`561`	`561`	`return scale.reshape(w.shape[0], -1), zero_point.reshape(w.shape[0], -1)`
`562`	`562`