File tree Expand file tree Collapse file tree 1 file changed +12
-0
lines changed Expand file tree Collapse file tree 1 file changed +12
-0
lines changed Original file line number Diff line number Diff line change @@ -1427,6 +1427,18 @@ def __init__(
1427
1427
assert (
1428
1428
self .n_share_experts_fusion == self .tp_size
1429
1429
), f"Shared experts fusion optimization is enabled in DeepSeek V3/R1, set it to { self .tp_size } can get best optimized performace."
1430
+ elif self .n_share_experts_fusion == 0 :
1431
+ if (
1432
+ torch .cuda .get_device_capability ("cuda" ) >= (9 , 0 )
1433
+ and self .config .architectures [0 ] == "DeepseekV3ForCausalLM"
1434
+ and self .config .n_routed_experts == 256
1435
+ and (not global_server_args_dict ["enable_deepep_moe" ])
1436
+ ):
1437
+ self .n_share_experts_fusion = self .tp_size
1438
+ global_server_args_dict ["n_share_experts_fusion" ] = self .tp_size
1439
+ logger .info (
1440
+ "Deepseek V3/R1 with fp8 can use shared experts fusion optimization when SM version >=90. Shared experts fusion optimization is enabled."
1441
+ )
1430
1442
1431
1443
self .model = DeepseekV2Model (
1432
1444
config , quant_config , prefix = add_prefix ("model" , prefix )
You can’t perform that action at this time.
0 commit comments