Skip to content

Commit 4d4ad83

Browse files
committed
address comment
Signed-off-by: Siyuan Fu <siyuanf@nvidia.com>
1 parent 8c28758 commit 4d4ad83

1 file changed

Lines changed: 4 additions & 4 deletions

File tree

benchmarks/bench_trtllm_gen_fused_moe_autotuner.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,12 @@ def mxint4_quantize(
4242
scales = amax / 8.0
4343
x_scaled = x_reshaped * scales.reciprocal()
4444
x_int8 = (
45-
x_scaled.round().clamp(-8, 7).to(torch.uint8).reshape(-1, sf_vec_size // 2, 2)
45+
x_scaled.round().clamp(-8, 7).to(torch.int8).reshape(-1, sf_vec_size // 2, 2)
4646
)
4747
x_int4 = (x_int8[..., 0] & 0x0F) | ((x_int8[..., 1] & 0x0F) << 4)
48-
return x_int4.reshape(*x.shape[:-1], x.shape[-1] // 2), scales.reshape(
49-
-1, sf_vec_size
50-
)
48+
return x_int4.reshape(*x.shape[:-1], x.shape[-1] // 2).view(
49+
torch.uint8
50+
), scales.reshape(-1, sf_vec_size)
5151

5252

5353
def bench_trtllm_gen_fused_moe_autotuner_fp8(

0 commit comments

Comments
 (0)