Skip to content

Commit 646f8a0

Browse files
committed
Revert "32x320 blockscale fp8 kernel"
This reverts commit 0eb4e16.
1 parent 0eb4e16 commit 646f8a0

File tree

7 files changed

+4
-11
lines changed

7 files changed

+4
-11
lines changed

aiter/fused_moe.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ def FinalFunc():
457457
in fused_moe_1stage_dict[get_gfx()]
458458
):
459459
if q_type == QuantType.per_1x128:
460-
run_1stage = True and (inter_dim % 256 == 0 or inter_dim % 320 == 0) and (token > 31)
460+
run_1stage = True and (inter_dim % 256 == 0) and (token > 31)
461461
elif q_type == QuantType.per_Token and q_dtype_w in [dtypes.i8, dtypes.fp8]:
462462
run_1stage = token > 32
463463
else:
@@ -483,8 +483,6 @@ def FinalFunc():
483483
f"[fused_moe] using {'1stage' if run_1stage else '2stage'} {'default' if cfg is None else tag} for {keys} "
484484
)
485485

486-
print('MyTag: ',tag)
487-
488486
if (
489487
"ck" in kernelName1
490488
or q_dtype_w
@@ -497,8 +495,7 @@ def FinalFunc():
497495
or (q_dtype_w == dtypes.fp8 and q_type == QuantType.per_1x128)
498496
or (q_type == QuantType.per_1x128 and block_m == 16)
499497
):
500-
if inter_dim % 320 != 0:
501-
return MOEMetadata(
498+
return MOEMetadata(
502499
functools.partial(
503500
aiter.ck_moe_stage1_fwd,
504501
kernelName=kernelName1,
@@ -514,15 +511,14 @@ def FinalFunc():
514511
block_m,
515512
ksplit,
516513
run_1stage,
517-
)
514+
)
518515

519516
# TODO: remove when stage2 support more size
520517
tmpList = [32, 64, 128]
521518
if block_m not in tmpList:
522519
tag = ""
523520
block_m = ([el for el in tmpList if block_m < el] + [128])[0]
524521

525-
print(run_1stage)
526522
return MOEMetadata(
527523
functools.partial(
528524
asm_stage1,

aiter/ops/moe_op.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,6 @@ def ck_moe_stage2_fwd(
419419
mul_routed_weight_stage,
420420
)
421421

422-
print(kernelName)
423422
ck_moe_stage2(
424423
inter_states,
425424
w1,

csrc/py_itfs_cu/asm_fmoe.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -822,7 +822,7 @@ void fmoe_fp8_blockscale_g1u1(torch::Tensor& out, // [token_cnt, d
822822
int sub_X_cnt = sorted_expert_ids.size(0);
823823
const char* enable_vskip = std::getenv("AITER_ENABLE_VSKIP");
824824

825-
if(out.dtype() == at::ScalarType::BFloat16 && (inter_dim % 256 == 0 || inter_dim % 320 == 0) && fc_scale_blkn == 128 &&
825+
if(out.dtype() == at::ScalarType::BFloat16 && inter_dim % 256 == 0 && fc_scale_blkn == 128 &&
826826
fc_scale_blkk == 128)
827827
{
828828
if(activation == ActivationType::Silu)

hsa/gfx942/fmoe/gelu/fmoe_bf16_blockscaleFp8_g1u1_gelu.csv

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@ _ZN5aiter47fmoe_bf16_blockscaleFp8_g1u1_vs_gelu_1tg_32x256E,fmoe_bf16_blockscale
33
_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_gelu_1tg_ps_32x256E,fmoe_bf16_blockscaleFp8_g1u1_vs_gelu_1tg_ps_32x256.co,0,1,0,1,1,32,256
44
_ZN5aiter49fmoe_bf16_blockscaleFp8_g1u1_novs_gelu_1tg_32x256E,fmoe_bf16_blockscaleFp8_g1u1_novs_gelu_1tg_32x256.co,0,0,0,1,0,32,256
55
_ZN5aiter52fmoe_bf16_blockscaleFp8_g1u1_novs_gelu_1tg_ps_32x256E,fmoe_bf16_blockscaleFp8_g1u1_novs_gelu_1tg_ps_32x256.co,0,0,0,1,1,32,256
6-
_ZN5aiter40fmoe_bf16_blockscaleFp8_g1u1_gelu_32x320E,fmoe_bf16_blockscaleFp8_g1u1_gelu_32x320.co,0,0,0,1,0,32,320
Binary file not shown.

hsa/gfx942/fmoe/silu/fmoe_bf16_blockscaleFp8_g1u1_silu.csv

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@ _ZN5aiter47fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_32x256E,fmoe_bf16_blockscale
33
_ZN5aiter52fmoe_bf16_blockscaleFp8_g1u1_novs_silu_1tg_ps_32x256E,fmoe_bf16_blockscaleFp8_g1u1_novs_silu_1tg_ps_32x256.co,0,0,0,1,1,32,256
44
_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256.co,0,1,0,1,1,32,256
55
_ZN5aiter49fmoe_bf16_blockscaleFp8_g1u1_novs_silu_1tg_32x256E,fmoe_bf16_blockscaleFp8_g1u1_novs_silu_1tg_32x256.co,0,0,0,1,0,32,256
6-
_ZN5aiter40fmoe_bf16_blockscaleFp8_g1u1_silu_32x320E,fmoe_bf16_blockscaleFp8_g1u1_silu_32x320.co,0,0,0,1,0,32,320
-73.5 KB
Binary file not shown.

0 commit comments

Comments
 (0)