17
17
import base64
18
18
import io
19
19
import json
20
+ import logging
20
21
import random
21
22
from abc import ABC , abstractmethod
22
23
from collections .abc import Mapping
35
36
from vllm .multimodal import MultiModalDataDict
36
37
from vllm .transformers_utils .tokenizer import AnyTokenizer , get_lora_tokenizer
37
38
39
+ logger = logging .getLogger (__name__ )
40
+
38
41
# -----------------------------------------------------------------------------
39
42
# Data Classes
40
43
# -----------------------------------------------------------------------------
@@ -61,9 +64,6 @@ class SampleRequest:
61
64
class BenchmarkDataset (ABC ):
62
65
DEFAULT_SEED = 0
63
66
64
- # num_requests has default 1000 in both the benchmark_serving.py and
65
- # benchmark_throughput.py
66
-
67
67
def __init__ (
68
68
self ,
69
69
dataset_path : Optional [str ] = None ,
@@ -90,8 +90,8 @@ def apply_multimodal_chat_transformation(
90
90
mm_content : Optional [MultiModalDataDict ] = None ) -> list [dict ]:
91
91
"""
92
92
Transform a prompt and optional multimodal content into a chat format.
93
- This method is used for chat models that expect a specific
94
- conversation format.
93
+ This method is used for chat models that expect a specific conversation
94
+ format.
95
95
"""
96
96
content = [{"text" : prompt , "type" : "text" }]
97
97
if mm_content is not None :
@@ -101,10 +101,10 @@ def apply_multimodal_chat_transformation(
101
101
def load_data (self ) -> None :
102
102
"""
103
103
Load data from the dataset path into self.data.
104
-
104
+
105
105
This method must be overridden by subclasses since the method to load
106
106
data will vary depending on the dataset format and source.
107
-
107
+
108
108
Raises:
109
109
NotImplementedError: If a subclass does not implement this method.
110
110
"""
@@ -121,18 +121,18 @@ def get_random_lora_request(
121
121
"""
122
122
Optionally select a random LoRA request and return its associated
123
123
tokenizer.
124
-
124
+
125
125
This method is used when LoRA parameters are provided. It randomly
126
126
selects a LoRA based on max_loras and retrieves a cached tokenizer for
127
127
that LoRA if available. Otherwise, it returns the base tokenizer.
128
-
128
+
129
129
Args:
130
130
tokenizer (PreTrainedTokenizerBase): The base tokenizer to use if no
131
131
LoRA is selected. max_loras (Optional[int]): The maximum number of
132
132
LoRAs available. If None, LoRA is not used. lora_path
133
133
(Optional[str]): Path to the LoRA parameters on disk. If None, LoRA
134
134
is not used.
135
-
135
+
136
136
Returns:
137
137
tuple[Optional[LoRARequest], AnyTokenizer]: A tuple where the first
138
138
element is a LoRARequest (or None if not applicable) and the second
@@ -160,21 +160,39 @@ def sample(self, tokenizer: PreTrainedTokenizerBase,
160
160
num_requests : int ) -> list [SampleRequest ]:
161
161
"""
162
162
Abstract method to generate sample requests from the dataset.
163
-
163
+
164
164
Subclasses must override this method to implement dataset-specific logic
165
165
for generating a list of SampleRequest objects.
166
-
166
+
167
167
Args:
168
168
tokenizer (PreTrainedTokenizerBase): The tokenizer to be used
169
169
for processing the dataset's text.
170
170
num_requests (int): The number of sample requests to generate.
171
-
171
+
172
172
Returns:
173
173
list[SampleRequest]: A list of sample requests generated from the
174
174
dataset.
175
175
"""
176
176
raise NotImplementedError ("sample must be implemented in subclasses." )
177
177
178
+ def maybe_oversample_requests (self , requests : list [SampleRequest ],
179
+ num_requests : int ) -> None :
180
+ """
181
+ Oversamples the list of requests if its size is less than the desired
182
+ number.
183
+
184
+ Args:
185
+ requests (List[SampleRequest]): The current list of sampled
186
+ requests. num_requests (int): The target number of requests.
187
+ """
188
+ if len (requests ) < num_requests :
189
+ random .seed (self .random_seed )
190
+ additional = random .choices (requests ,
191
+ k = num_requests - len (requests ))
192
+ requests .extend (additional )
193
+ logger .info ("Oversampled requests to reach %d total samples." ,
194
+ num_requests )
195
+
178
196
179
197
# -----------------------------------------------------------------------------
180
198
# Utility Functions and Global Caches
@@ -276,15 +294,16 @@ def __init__(
276
294
) -> None :
277
295
super ().__init__ (** kwargs )
278
296
279
- def sample (self ,
280
- tokenizer : PreTrainedTokenizerBase ,
281
- num_requests : int ,
282
- prefix_len : int = DEFAULT_PREFIX_LEN ,
283
- range_ratio : float = DEFAULT_RANGE_RATIO ,
284
- input_len : int = DEFAULT_INPUT_LEN ,
285
- output_len : int = DEFAULT_OUTPUT_LEN ,
286
- ** kwargs ) -> list [SampleRequest ]:
287
-
297
+ def sample (
298
+ self ,
299
+ tokenizer : PreTrainedTokenizerBase ,
300
+ num_requests : int ,
301
+ prefix_len : int = DEFAULT_PREFIX_LEN ,
302
+ range_ratio : float = DEFAULT_RANGE_RATIO ,
303
+ input_len : int = DEFAULT_INPUT_LEN ,
304
+ output_len : int = DEFAULT_OUTPUT_LEN ,
305
+ ** kwargs ,
306
+ ) -> list [SampleRequest ]:
288
307
vocab_size = tokenizer .vocab_size
289
308
290
309
prefix_token_ids = (np .random .randint (
@@ -346,20 +365,24 @@ def load_data(self) -> None:
346
365
random .seed (self .random_seed )
347
366
random .shuffle (self .data )
348
367
349
- def sample (self ,
350
- tokenizer : PreTrainedTokenizerBase ,
351
- num_requests : int ,
352
- lora_path : Optional [str ] = None ,
353
- max_loras : Optional [int ] = None ,
354
- output_len : Optional [int ] = None ,
355
- enable_multimodal_chat : bool = False ,
356
- ** kwargs ) -> list :
368
+ def sample (
369
+ self ,
370
+ tokenizer : PreTrainedTokenizerBase ,
371
+ num_requests : int ,
372
+ lora_path : Optional [str ] = None ,
373
+ max_loras : Optional [int ] = None ,
374
+ output_len : Optional [int ] = None ,
375
+ enable_multimodal_chat : bool = False ,
376
+ ** kwargs ,
377
+ ) -> list :
357
378
samples : list = []
358
379
for entry in self .data :
359
380
if len (samples ) >= num_requests :
360
381
break
361
- prompt , completion = entry ["conversations" ][0 ]["value" ],\
362
- entry ["conversations" ][1 ]["value" ]
382
+ prompt , completion = (
383
+ entry ["conversations" ][0 ]["value" ],
384
+ entry ["conversations" ][1 ]["value" ],
385
+ )
363
386
364
387
lora_request , tokenizer = self .get_random_lora_request (
365
388
tokenizer = tokenizer , max_loras = max_loras , lora_path = lora_path )
@@ -383,6 +406,7 @@ def sample(self,
383
406
expected_output_len = new_output_len ,
384
407
lora_request = lora_request ,
385
408
))
409
+ self .maybe_oversample_requests (samples , num_requests )
386
410
return samples
387
411
388
412
@@ -415,19 +439,20 @@ def load_data(self) -> None:
415
439
with open (self .dataset_path , encoding = "utf-8" ) as f :
416
440
self .data = f .readlines ()
417
441
418
- def sample (self ,
419
- tokenizer ,
420
- num_requests : int ,
421
- prefix_len : int = DEFAULT_PREFIX_LEN ,
422
- input_len : int = DEFAULT_INPUT_LEN ,
423
- output_len : int = DEFAULT_OUTPUT_LEN ,
424
- return_prompt_formatted : bool = False ,
425
- ** kwargs ) -> list :
442
+ def sample (
443
+ self ,
444
+ tokenizer ,
445
+ num_requests : int ,
446
+ prefix_len : int = DEFAULT_PREFIX_LEN ,
447
+ input_len : int = DEFAULT_INPUT_LEN ,
448
+ output_len : int = DEFAULT_OUTPUT_LEN ,
449
+ return_prompt_formatted : bool = False ,
450
+ ** kwargs ,
451
+ ) -> list :
426
452
# Calculate average token length for a poem line.
427
453
tokenized_lines = [tokenizer (line ).input_ids for line in self .data ]
428
454
avg_len = sum (len (tokens )
429
- for tokens in \
430
- tokenized_lines ) / len (tokenized_lines )
455
+ for tokens in tokenized_lines ) / len (tokenized_lines )
431
456
432
457
# Build the base prompt.
433
458
base_prompt = "Pick as many lines as you can from these poem lines:\n "
@@ -506,12 +531,14 @@ def _sample_loaded_data(self, num_requests: int) -> list:
506
531
# Convert the dataframe to a list of lists.
507
532
return data .values .tolist ()
508
533
509
- def sample (self ,
510
- tokenizer : PreTrainedTokenizerBase ,
511
- num_requests : int ,
512
- max_loras : Optional [int ] = None ,
513
- lora_path : Optional [str ] = None ,
514
- ** kwargs ) -> list [SampleRequest ]:
534
+ def sample (
535
+ self ,
536
+ tokenizer : PreTrainedTokenizerBase ,
537
+ num_requests : int ,
538
+ max_loras : Optional [int ] = None ,
539
+ lora_path : Optional [str ] = None ,
540
+ ** kwargs ,
541
+ ) -> list [SampleRequest ]:
515
542
samples = []
516
543
data = self ._sample_loaded_data (num_requests = num_requests )
517
544
for i in range (num_requests ):
@@ -544,7 +571,6 @@ class HuggingFaceDataset(BenchmarkDataset):
544
571
Dataset class for processing a HuggingFace dataset with conversation data
545
572
and optional images.
546
573
"""
547
- DEFAULT_NUM_REQUESTS = 1000
548
574
549
575
def __init__ (
550
576
self ,
@@ -618,6 +644,7 @@ def sample(self,
618
644
expected_output_len = output_len ,
619
645
multi_modal_data = mm_content ,
620
646
))
647
+ self .maybe_oversample_requests (sampled_requests , num_requests )
621
648
return sampled_requests
622
649
623
650
@@ -632,7 +659,6 @@ class VisionArenaDataset(HuggingFaceDataset):
632
659
"""
633
660
634
661
DEFAULT_OUTPUT_LEN = 128
635
- DEFAULT_NUM_REQUESTS = 1000
636
662
VISION_ARENA_DATASET_PATH = "lmarena-ai/vision-arena-bench-v0.1"
637
663
638
664
def __init__ (
@@ -657,12 +683,14 @@ def load_data(self) -> None:
657
683
)
658
684
self .data = dataset .shuffle (seed = self .random_seed )
659
685
660
- def sample (self ,
661
- tokenizer : PreTrainedTokenizerBase ,
662
- num_requests : int ,
663
- output_len : Optional [int ] = None ,
664
- enable_multimodal_chat : bool = False ,
665
- ** kwargs ) -> list :
686
+ def sample (
687
+ self ,
688
+ tokenizer : PreTrainedTokenizerBase ,
689
+ num_requests : int ,
690
+ output_len : Optional [int ] = None ,
691
+ enable_multimodal_chat : bool = False ,
692
+ ** kwargs ,
693
+ ) -> list :
666
694
output_len = (output_len
667
695
if output_len is not None else self .DEFAULT_OUTPUT_LEN )
668
696
sampled_requests = []
@@ -685,4 +713,5 @@ def sample(self,
685
713
expected_output_len = output_len ,
686
714
multi_modal_data = mm_content ,
687
715
))
716
+ self .maybe_oversample_requests (sampled_requests , num_requests )
688
717
return sampled_requests
0 commit comments