Merge pull request #3 from gtamer2/new_benchmarks

gtamer2 · web-flow · commit 93d16e28c586 · 2023-12-14T14:32:29.000-05:00
Script to run benchmarks
diff --git a/inference_benchmark.py b/inference_benchmark.py
@@ -6,7 +6,7 @@
 from torch.profiler import profile, record_function, ProfilerActivity
 
 ### Setup ###
-BATCH_SIZE = 1
+BATCH_SIZE = 16
 BATCH_COUNT = 5
 NUM_WORKERS = 1
 PROFILE_MEMORY = True
@@ -28,11 +28,11 @@
 def get_device():
     return torch.device(DEVICE_CUDA if torch.cuda.is_available() else DEVICE_CPU)
 
-def get_data_loader(num_workers=1):
+def get_data_loader(num_workers, batch_size):
     dataset = load_dataset(HUGGING_FACE_GSMK_DATASET_ID, 'main')['train']
     dataloader = DataLoader(
         dataset,
-        batch_size=BATCH_SIZE,
+        batch_size=batch_size,
         shuffle=False,
         num_workers=num_workers
     )
@@ -125,11 +125,13 @@ def __get_next_batch(dataloader):
 def benchmark(ckpt_dir, 
               tokenizer_path, 
               max_seq_len, 
-              max_batch_size):
+              max_batch_size,
+              batch_size=BATCH_SIZE,
+              num_workers=NUM_WORKERS):
     print("Starting up...")
 
     print("Building data loaders...")
-    data_loader = get_data_loader()
+    data_loader = get_data_loader(num_workers, batch_size)
 
     print("Initializing Model...")
     net = get_model(ckpt_dir, tokenizer_path, max_seq_len, max_batch_size)
@@ -164,4 +166,5 @@ def benchmark(ckpt_dir,
 
 
 if __name__ == "__main__":
+    torch.cuda.empty_cache()
     fire.Fire(benchmark)
diff --git a/run_inference_benchmarks.sh b/run_inference_benchmarks.sh
@@ -0,0 +1,55 @@
+# TODO: parameterize. this works for now.
+
+echo "Running inference benchmarks"
+
+if [ ! -d "benchmark_outputs" ]; then
+  echo "Creating benchmark_outputs directory"
+  mkdir benchmark_outputs
+fi
+
+echo "Batch size 1, num workers 0"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 1 --num_workers 0 > benchmark_outputs/batch_size_1_num_workers_0.txt
+echo "Batch size 16, num workers 0"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 16 --num_workers 0 > benchmark_outputs/batch_size_16_num_workers_0.txt
+echo "Batch size 32, num workers 0"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 32 --num_workers 0 > benchmark_outputs/batch_size_32_num_workers_0.txt
+echo "Batch size 128, num workers 0"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 128 --num_workers 0 > benchmark_outputs/batch_size_128_num_workers_0.txt
+
+echo "Batch size 1, num workers 1"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 1 --num_workers 1 > benchmark_outputs/batch_size_1_num_workers_1.txt
+echo "Batch size 16, num workers 1"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 16 --num_workers 1 > benchmark_outputs/batch_size_16_num_workers_1.txt
+echo "Batch size 32, num workers 1"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 32 --num_workers 1 > benchmark_outputs/batch_size_32_num_workers_1.txt
+echo "Batch size 128, num workers 1"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 128 --num_workers 1 > benchmark_outputs/batch_size_128_num_workers_1.txt
+
+echo "Batch size 1, num workers 2"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 1 --num_workers 2 > benchmark_outputs/batch_size_1_num_workers_2.txt
+echo "Batch size 16, num workers 2"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 16 --num_workers 2 > benchmark_outputs/batch_size_16_num_workers_2.txt
+echo "Batch size 32, num workers 2"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 32 --num_workers 2 > benchmark_outputs/batch_size_32_num_workers_2.txt
+echo "Batch size 128, num workers 2"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 128 --num_workers 2 > benchmark_outputs/batch_size_128_num_workers_2.txt
+
+echo "Batch size 1, num workers 4"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 1 --num_workers 4 > benchmark_outputs/batch_size_1_num_workers_4.txt
+echo "Batch size 16, num workers 4"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 16 --num_workers 4 > benchmark_outputs/batch_size_16_num_workers_4.txt
+echo "Batch size 32, num workers 4"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 32 --num_workers 4 > benchmark_outputs/batch_size_32_num_workers_4.txt
+echo "Batch size 128, num workers 4"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 128 --num_workers 4 > benchmark_outputs/batch_size_128_num_workers_4.txt
+
+echo "Batch size 1, num workers 8"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 1 --num_workers 8 > benchmark_outputs/batch_size_1_num_workers_8.txt
+echo "Batch size 16, num workers 8"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 16 --num_workers 8 > benchmark_outputs/batch_size_16_num_workers_8.txt
+echo "Batch size 32, num workers 8"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 32 --num_workers 8 > benchmark_outputs/batch_size_32_num_workers_8.txt
+echo "Batch size 128, num workers 8"
+torchrun inference_benchmark.py --ckpt_dir llama-2-7b/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 200 --batch_size 128 --num_workers 8 > benchmark_outputs/batch_size_128_num_workers_8.txt
+
+echo "DONE. Exiting."