Skip to content

Commit 93d16e2

Browse files
authored
Merge pull request #3 from gtamer2/new_benchmarks
Script to run benchmarks
2 parents b988f0e + 078aa9c commit 93d16e2

File tree

2 files changed

+63
-5
lines changed

2 files changed

+63
-5
lines changed

inference_benchmark.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from torch.profiler import profile, record_function, ProfilerActivity
77

88
### Setup ###
9-
BATCH_SIZE = 1
9+
BATCH_SIZE = 16
1010
BATCH_COUNT = 5
1111
NUM_WORKERS = 1
1212
PROFILE_MEMORY = True
@@ -28,11 +28,11 @@
2828
def get_device():
2929
return torch.device(DEVICE_CUDA if torch.cuda.is_available() else DEVICE_CPU)
3030

31-
def get_data_loader(num_workers=1):
31+
def get_data_loader(num_workers, batch_size):
3232
dataset = load_dataset(HUGGING_FACE_GSMK_DATASET_ID, 'main')['train']
3333
dataloader = DataLoader(
3434
dataset,
35-
batch_size=BATCH_SIZE,
35+
batch_size=batch_size,
3636
shuffle=False,
3737
num_workers=num_workers
3838
)
@@ -125,11 +125,13 @@ def __get_next_batch(dataloader):
125125
def benchmark(ckpt_dir,
126126
tokenizer_path,
127127
max_seq_len,
128-
max_batch_size):
128+
max_batch_size,
129+
batch_size=BATCH_SIZE,
130+
num_workers=NUM_WORKERS):
129131
print("Starting up...")
130132

131133
print("Building data loaders...")
132-
data_loader = get_data_loader()
134+
data_loader = get_data_loader(num_workers, batch_size)
133135

134136
print("Initializing Model...")
135137
net = get_model(ckpt_dir, tokenizer_path, max_seq_len, max_batch_size)
@@ -164,4 +166,5 @@ def benchmark(ckpt_dir,
164166

165167

166168
if __name__ == "__main__":
169+
torch.cuda.empty_cache()
167170
fire.Fire(benchmark)

run_inference_benchmarks.sh

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# TODO: parameterize. this works for now.
2+
3+
echo "Running inference benchmarks"
4+
5+
if [ ! -d "benchmark_outputs" ]; then
6+
echo "Creating benchmark_outputs directory"
7+
mkdir benchmark_outputs
8+
fi
9+
10+
echo "Batch size 1, num workers 0"
11+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 1 --num_workers 0 > benchmark_outputs/batch_size_1_num_workers_0.txt
12+
echo "Batch size 16, num workers 0"
13+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 16 --num_workers 0 > benchmark_outputs/batch_size_16_num_workers_0.txt
14+
echo "Batch size 32, num workers 0"
15+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 32 --num_workers 0 > benchmark_outputs/batch_size_32_num_workers_0.txt
16+
echo "Batch size 128, num workers 0"
17+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 128 --num_workers 0 > benchmark_outputs/batch_size_128_num_workers_0.txt
18+
19+
echo "Batch size 1, num workers 1"
20+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 1 --num_workers 1 > benchmark_outputs/batch_size_1_num_workers_1.txt
21+
echo "Batch size 16, num workers 1"
22+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 16 --num_workers 1 > benchmark_outputs/batch_size_16_num_workers_1.txt
23+
echo "Batch size 32, num workers 1"
24+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 32 --num_workers 1 > benchmark_outputs/batch_size_32_num_workers_1.txt
25+
echo "Batch size 128, num workers 1"
26+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 128 --num_workers 1 > benchmark_outputs/batch_size_128_num_workers_1.txt
27+
28+
echo "Batch size 1, num workers 2"
29+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 1 --num_workers 2 > benchmark_outputs/batch_size_1_num_workers_2.txt
30+
echo "Batch size 16, num workers 2"
31+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 16 --num_workers 2 > benchmark_outputs/batch_size_16_num_workers_2.txt
32+
echo "Batch size 32, num workers 2"
33+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 32 --num_workers 2 > benchmark_outputs/batch_size_32_num_workers_2.txt
34+
echo "Batch size 128, num workers 2"
35+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 128 --num_workers 2 > benchmark_outputs/batch_size_128_num_workers_2.txt
36+
37+
echo "Batch size 1, num workers 4"
38+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 1 --num_workers 4 > benchmark_outputs/batch_size_1_num_workers_4.txt
39+
echo "Batch size 16, num workers 4"
40+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 16 --num_workers 4 > benchmark_outputs/batch_size_16_num_workers_4.txt
41+
echo "Batch size 32, num workers 4"
42+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 32 --num_workers 4 > benchmark_outputs/batch_size_32_num_workers_4.txt
43+
echo "Batch size 128, num workers 4"
44+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 128 --num_workers 4 > benchmark_outputs/batch_size_128_num_workers_4.txt
45+
46+
echo "Batch size 1, num workers 8"
47+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 1 --num_workers 8 > benchmark_outputs/batch_size_1_num_workers_8.txt
48+
echo "Batch size 16, num workers 8"
49+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 16 --num_workers 8 > benchmark_outputs/batch_size_16_num_workers_8.txt
50+
echo "Batch size 32, num workers 8"
51+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 32 --num_workers 8 > benchmark_outputs/batch_size_32_num_workers_8.txt
52+
echo "Batch size 128, num workers 8"
53+
torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 128 --num_workers 8 > benchmark_outputs/batch_size_128_num_workers_8.txt
54+
55+
echo "DONE. Exiting."

0 commit comments

Comments
 (0)