forked from vllm-project/vllm
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_v2_offload.py
More file actions
31 lines (26 loc) · 998 Bytes
/
test_v2_offload.py
File metadata and controls
31 lines (26 loc) · 998 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Test V2 offloading correctness with DeepSeek V2 model."""
from ..utils import compare_two_settings
def test_v2_offload_deepseek():
"""Test V2 CPU offloading with DeepSeek-V2-Lite.
Compares outputs between:
1. Baseline (no offloading)
2. V2 offloading (group_size=8, num_in_group=2, prefetch_step=1)
This tests the advanced offloading with prefetching on a MoE model.
"""
compare_two_settings(
"deepseek-ai/DeepSeek-V2-Lite",
[
# V2 offloading configuration
"--offload-group-size",
"8",
"--offload-num-in-group",
"2",
"--offload-prefetch-step",
"1",
# torch.compile is automatically disabled when V2 offloading is
# enabled (via enable_if in @support_torch_compile decorator)
],
[], # Baseline: no offloading
)