Skip to content

Commit 9a557c8

Browse files
JenZhaonishith-fujitsu
authored andcommitted
[Benchmark] Allow oversample request in benchmark dataset (vllm-project#15170)
Signed-off-by: Jennifer Zhao <[email protected]>
1 parent 4c2672a commit 9a557c8

File tree

2 files changed

+139
-59
lines changed

2 files changed

+139
-59
lines changed

benchmarks/README.md

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ become available.
4242
</tr>
4343
<tr>
4444
<td><strong>HuggingFace</strong></td>
45-
<td style="text-align: center;"></td>
45+
<td style="text-align: center;">🟡</td>
4646
<td style="text-align: center;">🟡</td>
4747
<td>Specify your dataset path on HuggingFace</td>
4848
</tr>
@@ -60,8 +60,8 @@ become available.
6060
🚧: to be supported
6161

6262
🟡: Partial support. Currently, HuggingFaceDataset only supports dataset formats
63-
similar to `lmms-lab/LLaVA-OneVision-Data`. If you need support for other dataset
64-
formats, please consider contributing.
63+
similar to `lmms-lab/LLaVA-OneVision-Data` and `Aeala/ShareGPT_Vicuna_unfiltered`.
64+
If you need support for other dataset formats, please consider contributing.
6565

6666
**Note**: VisionArena’s `dataset-name` should be set to `hf`
6767

@@ -139,6 +139,57 @@ python3 vllm/benchmarks/benchmark_serving.py \
139139
--num-prompts "${NUM_PROMPTS}"
140140
```
141141

142+
### HuggingFaceDataset Examples
143+
144+
Currently, HuggingFaceDataset only supports dataset formats
145+
similar to `lmms-lab/LLaVA-OneVision-Data` and `Aeala/ShareGPT_Vicuna_unfiltered`. If you need support for other dataset
146+
formats, please consider contributing.
147+
148+
```bash
149+
# need a model with vision capability here
150+
vllm serve Qwen/Qwen2-VL-7B-Instruct --disable-log-requests
151+
```
152+
153+
**`lmms-lab/LLaVA-OneVision-Data`**
154+
155+
```bash
156+
MODEL_NAME="Qwen/Qwen2-VL-7B-Instruct"
157+
NUM_PROMPTS=10
158+
BACKEND="openai-chat"
159+
DATASET_NAME="hf"
160+
DATASET_PATH="lmms-lab/LLaVA-OneVision-Data"
161+
DATASET_SPLIT='train'
162+
DATASET_SUBSET='chart2text(cauldron)'
163+
python3 vllm/benchmarks/benchmark_serving.py \
164+
--backend "${BACKEND}" \
165+
--model "${MODEL_NAME}" \
166+
--endpoint "/v1/chat/completions" \
167+
--dataset-name "${DATASET_NAME}" \
168+
--dataset-path "${DATASET_PATH}" \
169+
--hf-split "${DATASET_SPLIT}" \
170+
--num-prompts "${NUM_PROMPTS}" \
171+
--hf-subset "${DATASET_SUBSET}"
172+
```
173+
174+
**`Aeala/ShareGPT_Vicuna_unfiltered`**
175+
176+
```bash
177+
MODEL_NAME="Qwen/Qwen2-VL-7B-Instruct"
178+
NUM_PROMPTS=10
179+
BACKEND="openai-chat"
180+
DATASET_NAME="hf"
181+
DATASET_PATH="Aeala/ShareGPT_Vicuna_unfiltered"
182+
DATASET_SPLIT='train'
183+
python3 vllm/benchmarks/benchmark_serving.py \
184+
--backend "${BACKEND}" \
185+
--model "${MODEL_NAME}" \
186+
--endpoint "/v1/chat/completions" \
187+
--dataset-name "${DATASET_NAME}" \
188+
--dataset-path "${DATASET_PATH}" \
189+
--hf-split "${DATASET_SPLIT}" \
190+
--num-prompts "${NUM_PROMPTS}" \
191+
```
192+
142193
---
143194
## Example - Offline Throughput Benchmark
144195

benchmarks/benchmark_dataset.py

Lines changed: 85 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import base64
1818
import io
1919
import json
20+
import logging
2021
import random
2122
from abc import ABC, abstractmethod
2223
from collections.abc import Mapping
@@ -35,6 +36,8 @@
3536
from vllm.multimodal import MultiModalDataDict
3637
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_lora_tokenizer
3738

39+
logger = logging.getLogger(__name__)
40+
3841
# -----------------------------------------------------------------------------
3942
# Data Classes
4043
# -----------------------------------------------------------------------------
@@ -61,9 +64,6 @@ class SampleRequest:
6164
class BenchmarkDataset(ABC):
6265
DEFAULT_SEED = 0
6366

64-
# num_requests has default 1000 in both the benchmark_serving.py and
65-
# benchmark_throughput.py
66-
6767
def __init__(
6868
self,
6969
dataset_path: Optional[str] = None,
@@ -90,8 +90,8 @@ def apply_multimodal_chat_transformation(
9090
mm_content: Optional[MultiModalDataDict] = None) -> list[dict]:
9191
"""
9292
Transform a prompt and optional multimodal content into a chat format.
93-
This method is used for chat models that expect a specific
94-
conversation format.
93+
This method is used for chat models that expect a specific conversation
94+
format.
9595
"""
9696
content = [{"text": prompt, "type": "text"}]
9797
if mm_content is not None:
@@ -101,10 +101,10 @@ def apply_multimodal_chat_transformation(
101101
def load_data(self) -> None:
102102
"""
103103
Load data from the dataset path into self.data.
104-
104+
105105
This method must be overridden by subclasses since the method to load
106106
data will vary depending on the dataset format and source.
107-
107+
108108
Raises:
109109
NotImplementedError: If a subclass does not implement this method.
110110
"""
@@ -121,18 +121,18 @@ def get_random_lora_request(
121121
"""
122122
Optionally select a random LoRA request and return its associated
123123
tokenizer.
124-
124+
125125
This method is used when LoRA parameters are provided. It randomly
126126
selects a LoRA based on max_loras and retrieves a cached tokenizer for
127127
that LoRA if available. Otherwise, it returns the base tokenizer.
128-
128+
129129
Args:
130130
tokenizer (PreTrainedTokenizerBase): The base tokenizer to use if no
131131
LoRA is selected. max_loras (Optional[int]): The maximum number of
132132
LoRAs available. If None, LoRA is not used. lora_path
133133
(Optional[str]): Path to the LoRA parameters on disk. If None, LoRA
134134
is not used.
135-
135+
136136
Returns:
137137
tuple[Optional[LoRARequest], AnyTokenizer]: A tuple where the first
138138
element is a LoRARequest (or None if not applicable) and the second
@@ -160,21 +160,39 @@ def sample(self, tokenizer: PreTrainedTokenizerBase,
160160
num_requests: int) -> list[SampleRequest]:
161161
"""
162162
Abstract method to generate sample requests from the dataset.
163-
163+
164164
Subclasses must override this method to implement dataset-specific logic
165165
for generating a list of SampleRequest objects.
166-
166+
167167
Args:
168168
tokenizer (PreTrainedTokenizerBase): The tokenizer to be used
169169
for processing the dataset's text.
170170
num_requests (int): The number of sample requests to generate.
171-
171+
172172
Returns:
173173
list[SampleRequest]: A list of sample requests generated from the
174174
dataset.
175175
"""
176176
raise NotImplementedError("sample must be implemented in subclasses.")
177177

178+
def maybe_oversample_requests(self, requests: list[SampleRequest],
179+
num_requests: int) -> None:
180+
"""
181+
Oversamples the list of requests if its size is less than the desired
182+
number.
183+
184+
Args:
185+
requests (List[SampleRequest]): The current list of sampled
186+
requests. num_requests (int): The target number of requests.
187+
"""
188+
if len(requests) < num_requests:
189+
random.seed(self.random_seed)
190+
additional = random.choices(requests,
191+
k=num_requests - len(requests))
192+
requests.extend(additional)
193+
logger.info("Oversampled requests to reach %d total samples.",
194+
num_requests)
195+
178196

179197
# -----------------------------------------------------------------------------
180198
# Utility Functions and Global Caches
@@ -276,15 +294,16 @@ def __init__(
276294
) -> None:
277295
super().__init__(**kwargs)
278296

279-
def sample(self,
280-
tokenizer: PreTrainedTokenizerBase,
281-
num_requests: int,
282-
prefix_len: int = DEFAULT_PREFIX_LEN,
283-
range_ratio: float = DEFAULT_RANGE_RATIO,
284-
input_len: int = DEFAULT_INPUT_LEN,
285-
output_len: int = DEFAULT_OUTPUT_LEN,
286-
**kwargs) -> list[SampleRequest]:
287-
297+
def sample(
298+
self,
299+
tokenizer: PreTrainedTokenizerBase,
300+
num_requests: int,
301+
prefix_len: int = DEFAULT_PREFIX_LEN,
302+
range_ratio: float = DEFAULT_RANGE_RATIO,
303+
input_len: int = DEFAULT_INPUT_LEN,
304+
output_len: int = DEFAULT_OUTPUT_LEN,
305+
**kwargs,
306+
) -> list[SampleRequest]:
288307
vocab_size = tokenizer.vocab_size
289308

290309
prefix_token_ids = (np.random.randint(
@@ -346,20 +365,24 @@ def load_data(self) -> None:
346365
random.seed(self.random_seed)
347366
random.shuffle(self.data)
348367

349-
def sample(self,
350-
tokenizer: PreTrainedTokenizerBase,
351-
num_requests: int,
352-
lora_path: Optional[str] = None,
353-
max_loras: Optional[int] = None,
354-
output_len: Optional[int] = None,
355-
enable_multimodal_chat: bool = False,
356-
**kwargs) -> list:
368+
def sample(
369+
self,
370+
tokenizer: PreTrainedTokenizerBase,
371+
num_requests: int,
372+
lora_path: Optional[str] = None,
373+
max_loras: Optional[int] = None,
374+
output_len: Optional[int] = None,
375+
enable_multimodal_chat: bool = False,
376+
**kwargs,
377+
) -> list:
357378
samples: list = []
358379
for entry in self.data:
359380
if len(samples) >= num_requests:
360381
break
361-
prompt, completion = entry["conversations"][0]["value"],\
362-
entry["conversations"][1]["value"]
382+
prompt, completion = (
383+
entry["conversations"][0]["value"],
384+
entry["conversations"][1]["value"],
385+
)
363386

364387
lora_request, tokenizer = self.get_random_lora_request(
365388
tokenizer=tokenizer, max_loras=max_loras, lora_path=lora_path)
@@ -383,6 +406,7 @@ def sample(self,
383406
expected_output_len=new_output_len,
384407
lora_request=lora_request,
385408
))
409+
self.maybe_oversample_requests(samples, num_requests)
386410
return samples
387411

388412

@@ -415,19 +439,20 @@ def load_data(self) -> None:
415439
with open(self.dataset_path, encoding="utf-8") as f:
416440
self.data = f.readlines()
417441

418-
def sample(self,
419-
tokenizer,
420-
num_requests: int,
421-
prefix_len: int = DEFAULT_PREFIX_LEN,
422-
input_len: int = DEFAULT_INPUT_LEN,
423-
output_len: int = DEFAULT_OUTPUT_LEN,
424-
return_prompt_formatted: bool = False,
425-
**kwargs) -> list:
442+
def sample(
443+
self,
444+
tokenizer,
445+
num_requests: int,
446+
prefix_len: int = DEFAULT_PREFIX_LEN,
447+
input_len: int = DEFAULT_INPUT_LEN,
448+
output_len: int = DEFAULT_OUTPUT_LEN,
449+
return_prompt_formatted: bool = False,
450+
**kwargs,
451+
) -> list:
426452
# Calculate average token length for a poem line.
427453
tokenized_lines = [tokenizer(line).input_ids for line in self.data]
428454
avg_len = sum(len(tokens)
429-
for tokens in \
430-
tokenized_lines) / len(tokenized_lines)
455+
for tokens in tokenized_lines) / len(tokenized_lines)
431456

432457
# Build the base prompt.
433458
base_prompt = "Pick as many lines as you can from these poem lines:\n"
@@ -506,12 +531,14 @@ def _sample_loaded_data(self, num_requests: int) -> list:
506531
# Convert the dataframe to a list of lists.
507532
return data.values.tolist()
508533

509-
def sample(self,
510-
tokenizer: PreTrainedTokenizerBase,
511-
num_requests: int,
512-
max_loras: Optional[int] = None,
513-
lora_path: Optional[str] = None,
514-
**kwargs) -> list[SampleRequest]:
534+
def sample(
535+
self,
536+
tokenizer: PreTrainedTokenizerBase,
537+
num_requests: int,
538+
max_loras: Optional[int] = None,
539+
lora_path: Optional[str] = None,
540+
**kwargs,
541+
) -> list[SampleRequest]:
515542
samples = []
516543
data = self._sample_loaded_data(num_requests=num_requests)
517544
for i in range(num_requests):
@@ -544,7 +571,6 @@ class HuggingFaceDataset(BenchmarkDataset):
544571
Dataset class for processing a HuggingFace dataset with conversation data
545572
and optional images.
546573
"""
547-
DEFAULT_NUM_REQUESTS = 1000
548574

549575
def __init__(
550576
self,
@@ -618,6 +644,7 @@ def sample(self,
618644
expected_output_len=output_len,
619645
multi_modal_data=mm_content,
620646
))
647+
self.maybe_oversample_requests(sampled_requests, num_requests)
621648
return sampled_requests
622649

623650

@@ -632,7 +659,6 @@ class VisionArenaDataset(HuggingFaceDataset):
632659
"""
633660

634661
DEFAULT_OUTPUT_LEN = 128
635-
DEFAULT_NUM_REQUESTS = 1000
636662
VISION_ARENA_DATASET_PATH = "lmarena-ai/vision-arena-bench-v0.1"
637663

638664
def __init__(
@@ -657,12 +683,14 @@ def load_data(self) -> None:
657683
)
658684
self.data = dataset.shuffle(seed=self.random_seed)
659685

660-
def sample(self,
661-
tokenizer: PreTrainedTokenizerBase,
662-
num_requests: int,
663-
output_len: Optional[int] = None,
664-
enable_multimodal_chat: bool = False,
665-
**kwargs) -> list:
686+
def sample(
687+
self,
688+
tokenizer: PreTrainedTokenizerBase,
689+
num_requests: int,
690+
output_len: Optional[int] = None,
691+
enable_multimodal_chat: bool = False,
692+
**kwargs,
693+
) -> list:
666694
output_len = (output_len
667695
if output_len is not None else self.DEFAULT_OUTPUT_LEN)
668696
sampled_requests = []
@@ -685,4 +713,5 @@ def sample(self,
685713
expected_output_len=output_len,
686714
multi_modal_data=mm_content,
687715
))
716+
self.maybe_oversample_requests(sampled_requests, num_requests)
688717
return sampled_requests

0 commit comments

Comments
 (0)