#!/bin/bash # # Print the model name, max context size, and average tokens per second from a # llama-server log file: # # llama_server_model_ctx_and_aver_tokens_per_sec.sh # log_file=$1 script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" grep "general.name *=" "${log_file}" grep "llama_context: n_ctx " "${log_file}" grep " eval time" "${log_file}" | "${script_dir}/llama_server_average_tokens_per_sec.py"