Update gguf_loader.py

Jack-Khuu · web-flow · commit e7b6f144f86c · 2024-12-13T16:33:00.000-08:00
diff --git a/torchchat/utils/gguf_loader.py b/torchchat/utils/gguf_loader.py
@@ -237,7 +237,7 @@ def _prepare_weight_and_scales_and_zeros(
         weight_int32, scales_and_zeros = group_quantize_tensor(
             weight_bf16, n_bit=4, groupsize=groupsize
         )
-        if is_device(weight_int32.device.type, "cpu") and TORCH_VERSION_AT_LEAST_2_6:
+        if is_device(weight_int32.device.type, "cpu"):
             weight_int4pack = torch.ops.aten._convert_weight_to_int4pack_for_cpu(
                 weight_int32, inner_k_tiles
             )

Original file line number	Diff line number	Diff line change
`@@ -237,7 +237,7 @@ def _prepare_weight_and_scales_and_zeros(`
`237`	`237`	`weight_int32, scales_and_zeros = group_quantize_tensor(`
`238`	`238`	`weight_bf16, n_bit=4, groupsize=groupsize`
`239`	`239`	`)`
`240`		`- if is_device(weight_int32.device.type, "cpu") and TORCH_VERSION_AT_LEAST_2_6:`
	`240`	`+ if is_device(weight_int32.device.type, "cpu"):`
`241`	`241`	`weight_int4pack = torch.ops.aten._convert_weight_to_int4pack_for_cpu(`
`242`	`242`	`weight_int32, inner_k_tiles`
`243`	`243`	`)`