Skip to content

Commit adc9ff3

Browse files
authored
llama-bench : allow using a different printer for stderr with -oe (#7722)
compare-commits.sh : hide stdout, use -oe to print markdown
1 parent 987d743 commit adc9ff3

File tree

2 files changed

+101
-60
lines changed

2 files changed

+101
-60
lines changed

examples/llama-bench/llama-bench.cpp

Lines changed: 92 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,11 @@ static std::string get_gpu_info() {
140140
}
141141

142142
// command line params
143-
enum output_formats {CSV, JSON, MARKDOWN, SQL};
143+
enum output_formats {NONE, CSV, JSON, MARKDOWN, SQL};
144144

145145
static const char * output_format_str(output_formats format) {
146146
switch (format) {
147+
case NONE: return "none";
147148
case CSV: return "csv";
148149
case JSON: return "json";
149150
case MARKDOWN: return "md";
@@ -152,6 +153,23 @@ static const char * output_format_str(output_formats format) {
152153
}
153154
}
154155

156+
static bool output_format_from_str(const std::string & s, output_formats & format) {
157+
if (s == "none") {
158+
format = NONE;
159+
} else if (s == "csv") {
160+
format = CSV;
161+
} else if (s == "json") {
162+
format = JSON;
163+
} else if (s == "md") {
164+
format = MARKDOWN;
165+
} else if (s == "sql") {
166+
format = SQL;
167+
} else {
168+
return false;
169+
}
170+
return true;
171+
}
172+
155173
static const char * split_mode_str(llama_split_mode mode) {
156174
switch (mode) {
157175
case LLAMA_SPLIT_MODE_NONE: return "none";
@@ -190,31 +208,33 @@ struct cmd_params {
190208
int reps;
191209
bool verbose;
192210
output_formats output_format;
211+
output_formats output_format_stderr;
193212
};
194213

195214
static const cmd_params cmd_params_defaults = {
196-
/* model */ {"models/7B/ggml-model-q4_0.gguf"},
197-
/* n_prompt */ {512},
198-
/* n_gen */ {128},
199-
/* n_pg */ {},
200-
/* n_batch */ {2048},
201-
/* n_ubatch */ {512},
202-
/* type_k */ {GGML_TYPE_F16},
203-
/* type_v */ {GGML_TYPE_F16},
204-
/* n_threads */ {cpu_get_num_math()},
205-
/* n_gpu_layers */ {99},
206-
/* rpc_servers */ {""},
207-
/* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
208-
/* main_gpu */ {0},
209-
/* no_kv_offload */ {false},
210-
/* flash_attn */ {false},
211-
/* tensor_split */ {std::vector<float>(llama_max_devices(), 0.0f)},
212-
/* use_mmap */ {true},
213-
/* embeddings */ {false},
214-
/* numa */ GGML_NUMA_STRATEGY_DISABLED,
215-
/* reps */ 5,
216-
/* verbose */ false,
217-
/* output_format */ MARKDOWN
215+
/* model */ {"models/7B/ggml-model-q4_0.gguf"},
216+
/* n_prompt */ {512},
217+
/* n_gen */ {128},
218+
/* n_pg */ {},
219+
/* n_batch */ {2048},
220+
/* n_ubatch */ {512},
221+
/* type_k */ {GGML_TYPE_F16},
222+
/* type_v */ {GGML_TYPE_F16},
223+
/* n_threads */ {cpu_get_num_math()},
224+
/* n_gpu_layers */ {99},
225+
/* rpc_servers */ {""},
226+
/* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
227+
/* main_gpu */ {0},
228+
/* no_kv_offload */ {false},
229+
/* flash_attn */ {false},
230+
/* tensor_split */ {std::vector<float>(llama_max_devices(), 0.0f)},
231+
/* use_mmap */ {true},
232+
/* embeddings */ {false},
233+
/* numa */ GGML_NUMA_STRATEGY_DISABLED,
234+
/* reps */ 5,
235+
/* verbose */ false,
236+
/* output_format */ MARKDOWN,
237+
/* output_format_stderr */ NONE,
218238
};
219239

220240
static void print_usage(int /* argc */, char ** argv) {
@@ -243,6 +263,7 @@ static void print_usage(int /* argc */, char ** argv) {
243263
printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n");
244264
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
245265
printf(" -o, --output <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
266+
printf(" -oe, --output-err <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
246267
printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
247268
printf("\n");
248269
printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n");
@@ -284,6 +305,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
284305

285306
params.verbose = cmd_params_defaults.verbose;
286307
params.output_format = cmd_params_defaults.output_format;
308+
params.output_format_stderr = cmd_params_defaults.output_format_stderr;
287309
params.reps = cmd_params_defaults.reps;
288310

289311
for (int i = 1; i < argc; i++) {
@@ -493,18 +515,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
493515
invalid_param = true;
494516
break;
495517
}
496-
if (argv[i] == std::string("csv")) {
497-
params.output_format = CSV;
498-
} else if (argv[i] == std::string("json")) {
499-
params.output_format = JSON;
500-
} else if (argv[i] == std::string("md")) {
501-
params.output_format = MARKDOWN;
502-
} else if (argv[i] == std::string("sql")) {
503-
params.output_format = SQL;
504-
} else {
518+
invalid_param = !output_format_from_str(argv[i], params.output_format);
519+
} else if (arg == "-oe" || arg == "--output-err") {
520+
if (++i >= argc) {
505521
invalid_param = true;
506522
break;
507523
}
524+
invalid_param = !output_format_from_str(argv[i], params.output_format_stderr);
508525
} else if (arg == "-v" || arg == "--verbose") {
509526
params.verbose = true;
510527
} else {
@@ -1278,6 +1295,22 @@ static void llama_null_log_callback(enum ggml_log_level level, const char * text
12781295
(void) user_data;
12791296
}
12801297

1298+
static std::unique_ptr<printer> create_printer(output_formats format) {
1299+
switch (format) {
1300+
case NONE:
1301+
return nullptr;
1302+
case CSV:
1303+
return std::unique_ptr<printer>(new csv_printer());
1304+
case JSON:
1305+
return std::unique_ptr<printer>(new json_printer());
1306+
case MARKDOWN:
1307+
return std::unique_ptr<printer>(new markdown_printer());
1308+
case SQL:
1309+
return std::unique_ptr<printer>(new sql_printer());
1310+
}
1311+
GGML_ASSERT(false);
1312+
}
1313+
12811314
int main(int argc, char ** argv) {
12821315
// try to set locale for unicode characters in markdown
12831316
setlocale(LC_CTYPE, ".UTF-8");
@@ -1304,26 +1337,18 @@ int main(int argc, char ** argv) {
13041337
llama_numa_init(params.numa);
13051338

13061339
// initialize printer
1307-
std::unique_ptr<printer> p;
1308-
switch (params.output_format) {
1309-
case CSV:
1310-
p.reset(new csv_printer());
1311-
break;
1312-
case JSON:
1313-
p.reset(new json_printer());
1314-
break;
1315-
case MARKDOWN:
1316-
p.reset(new markdown_printer());
1317-
break;
1318-
case SQL:
1319-
p.reset(new sql_printer());
1320-
break;
1321-
default:
1322-
assert(false);
1323-
exit(1);
1340+
std::unique_ptr<printer> p = create_printer(params.output_format);
1341+
std::unique_ptr<printer> p_err = create_printer(params.output_format_stderr);
1342+
1343+
if (p) {
1344+
p->fout = stdout;
1345+
p->print_header(params);
1346+
}
1347+
1348+
if (p_err) {
1349+
p_err->fout = stderr;
1350+
p_err->print_header(params);
13241351
}
1325-
p->fout = stdout;
1326-
p->print_header(params);
13271352

13281353
std::vector<cmd_params_instance> params_instances = get_cmd_params_instances(params);
13291354

@@ -1381,7 +1406,15 @@ int main(int argc, char ** argv) {
13811406
t.samples_ns.push_back(t_ns);
13821407
}
13831408

1384-
p->print_test(t);
1409+
if (p) {
1410+
p->print_test(t);
1411+
fflush(p->fout);
1412+
}
1413+
1414+
if (p_err) {
1415+
p_err->print_test(t);
1416+
fflush(p_err->fout);
1417+
}
13851418

13861419
llama_print_timings(ctx);
13871420

@@ -1390,7 +1423,13 @@ int main(int argc, char ** argv) {
13901423

13911424
llama_free_model(lmodel);
13921425

1393-
p->print_footer();
1426+
if (p) {
1427+
p->print_footer();
1428+
}
1429+
1430+
if (p_err) {
1431+
p_err->print_footer();
1432+
}
13941433

13951434
llama_backend_free();
13961435

scripts/compare-commits.sh

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,18 @@ set -x
1010

1111
bench_args="${@:3}"
1212

13-
rm -f llama-bench.sqlite
13+
rm -f llama-bench.sqlite > /dev/null
1414

1515
# to test a backend, call the script with the corresponding environment variable (e.g. LLAMA_CUDA=1 ./scripts/compare-commits.sh ...)
1616

17-
git checkout $1
18-
make clean && make -j32 $make_opts llama-bench
19-
./llama-bench -o sql $bench_args | tee /dev/tty | sqlite3 llama-bench.sqlite
17+
git checkout $1 > /dev/null
18+
make clean > /dev/null
19+
make -j$(nproc) $make_opts llama-bench > /dev/null
20+
./llama-bench -o sql -oe md $bench_args | sqlite3 llama-bench.sqlite
2021

21-
git checkout $2
22-
make clean && make -j32 $make_opts llama-bench
23-
./llama-bench -o sql $bench_args | tee /dev/tty | sqlite3 llama-bench.sqlite
22+
git checkout $2 > /dev/null
23+
make clean > /dev/null
24+
make -j$(nproc) $make_opts llama-bench > /dev/null
25+
./llama-bench -o sql -oe md $bench_args | sqlite3 llama-bench.sqlite
2426

2527
./scripts/compare-llama-bench.py -b $1 -c $2

0 commit comments

Comments
 (0)