Clean up unnecessary changes: revert naive_dp_ep.py, gdn_linear_attn.py, qwen3_moe.py, remove extra blank line in all2all.py

Copilot · hongbolv · web-flow · commit 7162b5915c2f · 2026-05-12T01:38:02.000Z
Agent-Logs-Url: https://github.com/hongbolv/vllm/sessions/c7d750b8-970e-44be-ba41-bf329ff4a67a Co-authored-by: hongbolv <33214277+hongbolv@users.noreply.github.com>
diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py
@@ -137,7 +137,6 @@ def combine(
         dist.barrier(group=dist_group.device_group)
 
         hidden_states = dist_group.reduce_scatterv(hidden_states, dim=0, sizes=sizes)
-
         return hidden_states
 
     def destroy(self):
diff --git a/vllm/model_executor/layers/fused_moe/prepare_finalize/naive_dp_ep.py b/vllm/model_executor/layers/fused_moe/prepare_finalize/naive_dp_ep.py
@@ -3,7 +3,7 @@
 import torch
 
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
-from vllm.distributed import get_dp_group, get_ep_group
+from vllm.distributed import get_ep_group
 from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
 from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
     TopKWeightAndReduceContiguous,
diff --git a/vllm/model_executor/layers/mamba/gdn_linear_attn.py b/vllm/model_executor/layers/mamba/gdn_linear_attn.py
@@ -528,7 +528,6 @@ def forward_cuda(
         3. Output projection
         """
         num_tokens = hidden_states.size(0)
-
         # ============================================================
         # Part 1: Input Projection
         # ============================================================
@@ -591,7 +590,6 @@ def forward_cuda(
         core_attn_out = core_attn_out.reshape(-1, core_attn_out.shape[-1])
         z = z.reshape(-1, z.shape[-1])
         core_attn_out = self.norm(core_attn_out, z)
-
         core_attn_out = core_attn_out.reshape(z_shape_og)
         core_attn_out = rearrange(core_attn_out, "... h d -> ... (h d)")
         output[:num_tokens], _ = self.out_proj(core_attn_out)
@@ -643,7 +641,6 @@ def forward_xpu(
         core_attn_out = core_attn_out.reshape(-1, core_attn_out.shape[-1])
         z = z.reshape(-1, z.shape[-1])
         core_attn_out = self.norm(core_attn_out, z)
-
         core_attn_out = core_attn_out.reshape(z_shape_og)
         core_attn_out = rearrange(core_attn_out, "... h d -> ... (h d)")
         output[:num_tokens], _ = self.out_proj(core_attn_out)
diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py
@@ -391,7 +391,6 @@ def __init__(self, vllm_config: VllmConfig, prefix: str = "") -> None:
 
         # `mlp_only_layers` in the config.
         layer_idx = extract_layer_index(prefix)
-        self.layer_idx = layer_idx
         mlp_only_layers = (
             [] if not hasattr(config, "mlp_only_layers") else config.mlp_only_layers
         )

Original file line number	Diff line number	Diff line change
`@@ -391,7 +391,6 @@ def __init__(self, vllm_config: VllmConfig, prefix: str = "") -> None:`
`391`	`391`
`392`	`392`	# `mlp_only_layers` in the config.
`393`	`393`	`layer_idx = extract_layer_index(prefix)`
`394`		`- self.layer_idx = layer_idx`
`395`	`394`	`mlp_only_layers = (`
`396`	`395`	`[] if not hasattr(config, "mlp_only_layers") else config.mlp_only_layers`
`397`	`396`	`)`