|
9 | 9 |
|
10 | 10 | from vllm.platforms import current_platform
|
11 | 11 |
|
| 12 | +from ...registry import HF_EXAMPLE_MODELS |
12 | 13 | from ...utils import check_logprobs_close
|
13 | 14 |
|
14 | 15 | # These have unsupported head_dim for FA. We do not
|
|
33 | 34 |
|
34 | 35 | # @maybe_test_rocm_aiter
|
35 | 36 | @pytest.mark.parametrize(
|
36 |
| - "model", |
| 37 | + "model_arch", |
37 | 38 | [
|
38 | 39 | pytest.param(
|
39 |
| - "bigscience/bloom-560m", # bloom - testing alibi slopes |
| 40 | + "BloomForCausalLM", # testing alibi slopes |
40 | 41 | marks=[pytest.mark.core_model, pytest.mark.cpu_model],
|
41 | 42 | ),
|
42 | 43 | pytest.param(
|
43 |
| - "openai-community/gpt2", # gpt2 |
| 44 | + "GPT2LMHeadModel", # gpt2 |
44 | 45 | marks=[pytest.mark.core_model, pytest.mark.cpu_model],
|
45 | 46 | ),
|
46 |
| - pytest.param("Milos/slovak-gpt-j-405M"), # gptj |
47 |
| - pytest.param("bigcode/tiny_starcoder_py"), # gpt_bigcode |
48 |
| - pytest.param("EleutherAI/pythia-70m"), # gpt_neox |
| 47 | + pytest.param("GPTJForCausalLM"), |
| 48 | + pytest.param("GPTBigCodeForCausalLM"), |
| 49 | + pytest.param("GPTNeoXForCausalLM"), |
49 | 50 | pytest.param(
|
50 |
| - "google/gemma-1.1-2b-it", # gemma |
| 51 | + "GemmaForCausalLM", # gemma |
51 | 52 | marks=[pytest.mark.core_model, pytest.mark.cpu_model],
|
52 | 53 | ),
|
| 54 | + pytest.param("GlmForCausalLM"), |
53 | 55 | pytest.param(
|
54 |
| - "THUDM/chatglm3-6b", # chatglm (text-only) |
55 |
| - ), |
56 |
| - pytest.param( |
57 |
| - "meta-llama/Llama-3.2-1B-Instruct", # llama |
| 56 | + "LlamaForCausalLM", |
58 | 57 | marks=[pytest.mark.core_model, pytest.mark.cpu_model],
|
59 | 58 | ),
|
60 | 59 | pytest.param(
|
61 |
| - "openbmb/MiniCPM3-4B", |
| 60 | + "MiniCPM3ForCausalLM", |
62 | 61 | # fused_moe not supported on CPU
|
63 | 62 | marks=[pytest.mark.core_model],
|
64 | 63 | ),
|
65 | 64 | pytest.param(
|
66 |
| - "facebook/opt-125m", # opt |
| 65 | + "OPTForCausalLM", |
67 | 66 | marks=[pytest.mark.core_model, pytest.mark.cpu_model],
|
68 | 67 | ),
|
69 | 68 | pytest.param(
|
70 |
| - "microsoft/phi-2", # phi |
| 69 | + "PhiForCausalLM", |
71 | 70 | marks=[pytest.mark.core_model],
|
72 | 71 | ),
|
| 72 | + pytest.param("QWenLMHeadModel", ), |
73 | 73 | pytest.param(
|
74 |
| - "Qwen/Qwen-7B", # qwen (text-only) |
75 |
| - ), |
76 |
| - pytest.param( |
77 |
| - "Qwen/Qwen2.5-0.5B-Instruct", # qwen2 |
| 74 | + "Qwen2ForCausalLM", |
78 | 75 | marks=[pytest.mark.core_model],
|
79 | 76 | ),
|
80 |
| - pytest.param("stabilityai/stablelm-3b-4e1t"), # stablelm |
81 |
| - pytest.param("bigcode/starcoder2-3b"), # starcoder2 |
| 77 | + pytest.param("StableLmForCausalLM"), |
| 78 | + pytest.param("Starcoder2ForCausalLM"), |
82 | 79 | pytest.param(
|
83 |
| - "ehristoforu/Falcon3-MoE-2x7B-Insruct", # mixtral |
| 80 | + "MixtralForCausalLM", |
84 | 81 | marks=[pytest.mark.cpu_model],
|
85 | 82 | )
|
86 | 83 | ])
|
|
89 | 86 | @pytest.mark.parametrize("num_logprobs", [5])
|
90 | 87 | @pytest.mark.parametrize(
|
91 | 88 | "use_rocm_aiter", [True, False] if current_platform.is_rocm() else [False])
|
92 |
| -def test_models(hf_runner, vllm_runner, example_prompts, model: str, |
| 89 | +def test_models(hf_runner, vllm_runner, example_prompts, model_arch: str, |
93 | 90 | dtype: str, max_tokens: int, num_logprobs: int,
|
94 | 91 | use_rocm_aiter: bool, monkeypatch) -> None:
|
95 | 92 |
|
| 93 | + model = HF_EXAMPLE_MODELS.get_hf_info(model_arch).default |
| 94 | + |
96 | 95 | if model in REQUIRES_V0:
|
97 | 96 | monkeypatch.setenv("VLLM_USE_V1", "0")
|
98 | 97 |
|
|
0 commit comments