Skip to content

Commit e0bba86

Browse files
merrymercythyecust
authored andcommitted
Fix 2-gpu CI test and suppress some warnings (sgl-project#4930)
1 parent a67c15e commit e0bba86

File tree

4 files changed

+21
-19
lines changed

4 files changed

+21
-19
lines changed

python/sglang/srt/models/deepseek_v2.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@
3030
tensor_model_parallel_all_reduce,
3131
)
3232
from sglang.srt.layers.activation import SiluAndMul
33-
from sglang.srt.layers.attention.triton_ops.rocm_mla_decode_rope import (
34-
decode_attention_fwd_grouped_rope,
35-
)
3633
from sglang.srt.layers.dp_attention import (
3734
dp_gather_partial,
3835
dp_scatter,
@@ -83,6 +80,11 @@
8380
else:
8481
from vllm import _custom_ops as ops
8582

83+
if _is_hip:
84+
from sglang.srt.layers.attention.triton_ops.rocm_mla_decode_rope import (
85+
decode_attention_fwd_grouped_rope,
86+
)
87+
8688
expert_distribution_recorder = ExpertDistributionRecorder()
8789

8890

python/sglang/srt/utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,10 @@ def load_image(image_file: Union[str, bytes]) -> tuple[Image, tuple[int, int]]:
564564

565565

566566
def suppress_other_loggers():
567+
warnings.filterwarnings(
568+
"ignore", category=UserWarning, message="The given NumPy array is not writable"
569+
)
570+
567571
try:
568572
from vllm.logger import logger as vllm_default_logger
569573
except ImportError:
@@ -578,10 +582,6 @@ def suppress_other_loggers():
578582
)
579583
logging.getLogger("vllm.config").setLevel(logging.ERROR)
580584

581-
warnings.filterwarnings(
582-
"ignore", category=UserWarning, message="The given NumPy array is not writable"
583-
)
584-
585585

586586
def assert_pkg_version(pkg: str, min_version: str, message: str):
587587
try:

test/srt/run_suite.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class TestFile:
1515
"per-commit": [
1616
TestFile("models/lora/test_lora.py", 76),
1717
TestFile("models/lora/test_lora_backend.py", 420),
18-
TestFile("models/lora/test_multi_lora_backend.py", 144),
18+
TestFile("models/lora/test_multi_lora_backend.py", 60),
1919
TestFile("models/test_embedding_models.py", 35),
2020
TestFile("models/test_generation_models.py", 103),
2121
TestFile("models/test_grok_models.py", 60),
@@ -28,15 +28,15 @@ class TestFile:
2828
TestFile("test_chunked_prefill.py", 336),
2929
TestFile("test_eagle_infer.py", 500),
3030
TestFile("test_ebnf_constrained.py"),
31-
TestFile("test_fp8_kernel.py", 2),
31+
TestFile("test_fp8_kernel.py", 8),
3232
TestFile("test_embedding_openai_server.py", 36),
3333
TestFile("test_hidden_states.py", 55),
34-
TestFile("test_int8_kernel.py", 1),
34+
TestFile("test_int8_kernel.py", 8),
3535
TestFile("test_input_embeddings.py", 38),
3636
TestFile("test_json_constrained.py", 98),
3737
TestFile("test_large_max_new_tokens.py", 41),
3838
TestFile("test_metrics.py", 32),
39-
TestFile("test_mla.py", 92),
39+
TestFile("test_mla.py", 162),
4040
TestFile("test_mla_deepseek_v3.py", 221),
4141
TestFile("test_mla_int8_deepseek_v3.py", 522),
4242
TestFile("test_mla_flashinfer.py", 395),
@@ -68,23 +68,23 @@ class TestFile:
6868
TestFile("test_vertex_endpoint.py", 31),
6969
TestFile("test_vision_chunked_prefill.py", 223),
7070
TestFile("test_vlm_accuracy.py", 60),
71-
TestFile("test_vision_openai_server.py", 344),
72-
TestFile("test_fim_completion.py", 120),
71+
TestFile("test_vision_openai_server.py", 537),
72+
TestFile("test_fim_completion.py", 40),
7373
TestFile("test_w8a8_quantization.py", 46),
74-
TestFile("test_eval_fp8_accuracy.py", 172),
74+
TestFile("test_eval_fp8_accuracy.py", 303),
7575
TestFile("test_create_kvindices.py", 2),
7676
TestFile("test_hicache.py", 60),
7777
TestFile("test_hicache_mla.py", 90),
7878
],
7979
"per-commit-2-gpu": [
80+
TestFile("models/lora/test_lora_tp.py", 300),
8081
TestFile("test_data_parallelism.py", 90),
8182
TestFile("test_dp_attention.py", 90),
83+
TestFile("test_mla_tp.py", 420),
84+
TestFile("test_moe_ep.py", 220),
85+
TestFile("test_patch_torch.py", 30),
8286
TestFile("test_update_weights_from_distributed.py", 100),
8387
TestFile("test_verl_engine.py", 100),
84-
TestFile("test_patch_torch.py", 30),
85-
TestFile("test_moe_ep.py", 220),
86-
TestFile("test_mla_tp.py", 420),
87-
TestFile("test_lora_tp.py", 300),
8888
],
8989
"nightly": [
9090
TestFile("test_nightly_gsm8k_eval.py"),

test/srt/test_eagle_infer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ def setUpClass(cls):
567567
"--max-running-requests",
568568
8,
569569
"--page-size",
570-
4,
570+
8,
571571
],
572572
)
573573

0 commit comments

Comments
 (0)