|
36 | 36 | RowParallelLinear,
|
37 | 37 | )
|
38 | 38 | from sglang.srt.layers.logits_processor import LogitsProcessor
|
| 39 | +from sglang.srt.layers.moe.ep_moe.layer import EPMoE |
39 | 40 | from sglang.srt.layers.moe.fused_moe_triton import FusedMoE
|
40 | 41 | from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
41 | 42 | from sglang.srt.layers.radix_attention import RadixAttention
|
|
45 | 46 | VocabParallelEmbedding,
|
46 | 47 | )
|
47 | 48 | from sglang.srt.managers.expert_distribution import ExpertDistributionRecorder
|
| 49 | +from sglang.srt.managers.schedule_batch import global_server_args_dict |
48 | 50 | from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
49 | 51 | from sglang.srt.model_loader.weight_utils import default_weight_loader
|
50 | 52 | from sglang.srt.utils import add_prefix, make_layers
|
@@ -108,12 +110,13 @@ def __init__(
|
108 | 110 | f"the number of experts {config.num_experts}."
|
109 | 111 | )
|
110 | 112 |
|
111 |
| - self.experts = FusedMoE( |
| 113 | + MoEImpl = EPMoE if global_server_args_dict["enable_ep_moe"] else FusedMoE |
| 114 | + |
| 115 | + self.experts = MoEImpl( |
112 | 116 | num_experts=config.num_experts,
|
113 | 117 | top_k=config.num_experts_per_tok,
|
114 | 118 | hidden_size=config.hidden_size,
|
115 | 119 | intermediate_size=config.moe_intermediate_size,
|
116 |
| - reduce_results=False, |
117 | 120 | renormalize=config.norm_topk_prob,
|
118 | 121 | quant_config=quant_config,
|
119 | 122 | prefix=add_prefix("experts", prefix),
|
@@ -427,7 +430,9 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
|
427 | 430 | ("gate_up_proj", "up_proj", 1),
|
428 | 431 | ]
|
429 | 432 |
|
430 |
| - expert_params_mapping = FusedMoE.make_expert_params_mapping( |
| 433 | + MoEImpl = EPMoE if global_server_args_dict["enable_ep_moe"] else FusedMoE |
| 434 | + |
| 435 | + expert_params_mapping = MoEImpl.make_expert_params_mapping( |
431 | 436 | ckpt_gate_proj_name="gate_proj",
|
432 | 437 | ckpt_down_proj_name="down_proj",
|
433 | 438 | ckpt_up_proj_name="up_proj",
|
|
0 commit comments