@@ -2,31 +2,31 @@ export CHECKPOINT_PATH=../../../checkpoints # path to checkpoints folder
2
2
3
3
4
4
export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf
5
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision torch.float32 --write_result benchmark_results.txt
6
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt
7
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --write_result benchmark_results.txt
8
- # # in readme
9
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --write_result benchmark_results.txt
10
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8dq --write_result benchmark_results.txt
11
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8wo --write_result benchmark_results.txt
12
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int4wo-64 --write_result benchmark_results.txt
5
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision torch.float32 --write_result benchmark_results.txt
6
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --write_result benchmark_results.txt
7
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --compile_prefill --write_result benchmark_results.txt
8
+ # in readme
9
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --write_result benchmark_results.txt
10
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --quantization int8dq --write_result benchmark_results.txt
11
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --quantization int8wo --write_result benchmark_results.txt
12
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --quantization int4wo-64 --write_result benchmark_results.txt
13
13
python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --compile_prefill --quantization autoquant --write_result benchmark_results.txt
14
14
15
15
export MODEL_REPO=meta-llama/Meta-Llama-3-8B
16
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --precision torch.float32 --write_result benchmark_results.txt
17
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --write_result benchmark_results.txt
18
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --compile_prefill --write_result benchmark_results.txt
19
- # # in readme
20
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --write_result benchmark_results.txt
21
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8dq --write_result benchmark_results.txt
22
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int8wo --write_result benchmark_results.txt
23
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --quantization int4wo-64 --write_result benchmark_results.txt
16
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --precision torch.float32 --write_result benchmark_results.txt
17
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --write_result benchmark_results.txt
18
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --compile_prefill --write_result benchmark_results.txt
19
+ # in readme
20
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --write_result benchmark_results.txt
21
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --quantization int8dq --write_result benchmark_results.txt
22
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --quantization int8wo --write_result benchmark_results.txt
23
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --quantization int4wo-64 --write_result benchmark_results.txt
24
24
python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --compile_prefill --quantization autoquant --write_result benchmark_results.txt
25
25
26
- # export MODEL_REPO=meta-llama/Meta-Llama-3-8B
27
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --write_result benchmark_results.txt
28
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --write_result benchmark_results.txt --kv_cache_quantization
29
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --write_result benchmark_results.txt --max_new_tokens 2048
30
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --write_result benchmark_results.txt --kv_cache_quantization --max_new_tokens 2048
31
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --write_result benchmark_results.txt --max_new_tokens 8192
32
- # python generate.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --compile --write_result benchmark_results.txt --kv_cache_quantization --max_new_tokens 8192
26
+ export MODEL_REPO=meta-llama/Meta-Llama-3-8B
27
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --write_result benchmark_results.txt
28
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --write_result benchmark_results.txt --kv_cache_quantization
29
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --write_result benchmark_results.txt --max_new_tokens 2048
30
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --write_result benchmark_results.txt --kv_cache_quantization --max_new_tokens 2048
31
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --write_result benchmark_results.txt --max_new_tokens 8192
32
+ python generate.py --checkpoint_path $CHECKPOINT_PATH /$MODEL_REPO /model.pth --compile --write_result benchmark_results.txt --kv_cache_quantization --max_new_tokens 8192
0 commit comments