1
+ # TODO: parameterize. this works for now.
2
+
3
+ echo " Running inference benchmarks"
4
+
5
+ if [ ! -d " benchmark_outputs" ]; then
6
+ echo " Creating benchmark_outputs directory"
7
+ mkdir benchmark_outputs
8
+ fi
9
+
10
+ echo " Batch size 1, num workers 0"
11
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 1 --num_workers 0 > benchmark_outputs/batch_size_1_num_workers_0.txt
12
+ echo " Batch size 16, num workers 0"
13
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 16 --num_workers 0 > benchmark_outputs/batch_size_16_num_workers_0.txt
14
+ echo " Batch size 32, num workers 0"
15
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 32 --num_workers 0 > benchmark_outputs/batch_size_32_num_workers_0.txt
16
+ echo " Batch size 128, num workers 0"
17
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 128 --num_workers 0 > benchmark_outputs/batch_size_128_num_workers_0.txt
18
+
19
+ echo " Batch size 1, num workers 1"
20
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 1 --num_workers 1 > benchmark_outputs/batch_size_1_num_workers_1.txt
21
+ echo " Batch size 16, num workers 1"
22
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 16 --num_workers 1 > benchmark_outputs/batch_size_16_num_workers_1.txt
23
+ echo " Batch size 32, num workers 1"
24
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 32 --num_workers 1 > benchmark_outputs/batch_size_32_num_workers_1.txt
25
+ echo " Batch size 128, num workers 1"
26
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 128 --num_workers 1 > benchmark_outputs/batch_size_128_num_workers_1.txt
27
+
28
+ echo " Batch size 1, num workers 2"
29
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 1 --num_workers 2 > benchmark_outputs/batch_size_1_num_workers_2.txt
30
+ echo " Batch size 16, num workers 2"
31
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 16 --num_workers 2 > benchmark_outputs/batch_size_16_num_workers_2.txt
32
+ echo " Batch size 32, num workers 2"
33
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 32 --num_workers 2 > benchmark_outputs/batch_size_32_num_workers_2.txt
34
+ echo " Batch size 128, num workers 2"
35
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 128 --num_workers 2 > benchmark_outputs/batch_size_128_num_workers_2.txt
36
+
37
+ echo " Batch size 1, num workers 4"
38
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 1 --num_workers 4 > benchmark_outputs/batch_size_1_num_workers_4.txt
39
+ echo " Batch size 16, num workers 4"
40
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 16 --num_workers 4 > benchmark_outputs/batch_size_16_num_workers_4.txt
41
+ echo " Batch size 32, num workers 4"
42
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 32 --num_workers 4 > benchmark_outputs/batch_size_32_num_workers_4.txt
43
+ echo " Batch size 128, num workers 4"
44
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 128 --num_workers 4 > benchmark_outputs/batch_size_128_num_workers_4.txt
45
+
46
+ echo " Batch size 1, num workers 8"
47
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 1 --num_workers 8 > benchmark_outputs/batch_size_1_num_workers_8.txt
48
+ echo " Batch size 16, num workers 8"
49
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 16 --num_workers 8 > benchmark_outputs/batch_size_16_num_workers_8.txt
50
+ echo " Batch size 32, num workers 8"
51
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 32 --num_workers 8 > benchmark_outputs/batch_size_32_num_workers_8.txt
52
+ echo " Batch size 128, num workers 8"
53
+ torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 128 --num_workers 8 > benchmark_outputs/batch_size_128_num_workers_8.txt
54
+
55
+ echo " DONE. Exiting."
0 commit comments