@@ -140,10 +140,11 @@ static std::string get_gpu_info() {
140
140
}
141
141
142
142
// command line params
143
- enum output_formats {CSV, JSON, MARKDOWN, SQL};
143
+ enum output_formats {NONE, CSV, JSON, MARKDOWN, SQL};
144
144
145
145
static const char * output_format_str (output_formats format) {
146
146
switch (format) {
147
+ case NONE: return " none" ;
147
148
case CSV: return " csv" ;
148
149
case JSON: return " json" ;
149
150
case MARKDOWN: return " md" ;
@@ -152,6 +153,23 @@ static const char * output_format_str(output_formats format) {
152
153
}
153
154
}
154
155
156
+ static bool output_format_from_str (const std::string & s, output_formats & format) {
157
+ if (s == " none" ) {
158
+ format = NONE;
159
+ } else if (s == " csv" ) {
160
+ format = CSV;
161
+ } else if (s == " json" ) {
162
+ format = JSON;
163
+ } else if (s == " md" ) {
164
+ format = MARKDOWN;
165
+ } else if (s == " sql" ) {
166
+ format = SQL;
167
+ } else {
168
+ return false ;
169
+ }
170
+ return true ;
171
+ }
172
+
155
173
static const char * split_mode_str (llama_split_mode mode) {
156
174
switch (mode) {
157
175
case LLAMA_SPLIT_MODE_NONE: return " none" ;
@@ -190,31 +208,33 @@ struct cmd_params {
190
208
int reps;
191
209
bool verbose;
192
210
output_formats output_format;
211
+ output_formats output_format_stderr;
193
212
};
194
213
195
214
static const cmd_params cmd_params_defaults = {
196
- /* model */ {" models/7B/ggml-model-q4_0.gguf" },
197
- /* n_prompt */ {512 },
198
- /* n_gen */ {128 },
199
- /* n_pg */ {},
200
- /* n_batch */ {2048 },
201
- /* n_ubatch */ {512 },
202
- /* type_k */ {GGML_TYPE_F16},
203
- /* type_v */ {GGML_TYPE_F16},
204
- /* n_threads */ {cpu_get_num_math ()},
205
- /* n_gpu_layers */ {99 },
206
- /* rpc_servers */ {" " },
207
- /* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
208
- /* main_gpu */ {0 },
209
- /* no_kv_offload */ {false },
210
- /* flash_attn */ {false },
211
- /* tensor_split */ {std::vector<float >(llama_max_devices (), 0 .0f )},
212
- /* use_mmap */ {true },
213
- /* embeddings */ {false },
214
- /* numa */ GGML_NUMA_STRATEGY_DISABLED,
215
- /* reps */ 5 ,
216
- /* verbose */ false ,
217
- /* output_format */ MARKDOWN
215
+ /* model */ {" models/7B/ggml-model-q4_0.gguf" },
216
+ /* n_prompt */ {512 },
217
+ /* n_gen */ {128 },
218
+ /* n_pg */ {},
219
+ /* n_batch */ {2048 },
220
+ /* n_ubatch */ {512 },
221
+ /* type_k */ {GGML_TYPE_F16},
222
+ /* type_v */ {GGML_TYPE_F16},
223
+ /* n_threads */ {cpu_get_num_math ()},
224
+ /* n_gpu_layers */ {99 },
225
+ /* rpc_servers */ {" " },
226
+ /* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
227
+ /* main_gpu */ {0 },
228
+ /* no_kv_offload */ {false },
229
+ /* flash_attn */ {false },
230
+ /* tensor_split */ {std::vector<float >(llama_max_devices (), 0 .0f )},
231
+ /* use_mmap */ {true },
232
+ /* embeddings */ {false },
233
+ /* numa */ GGML_NUMA_STRATEGY_DISABLED,
234
+ /* reps */ 5 ,
235
+ /* verbose */ false ,
236
+ /* output_format */ MARKDOWN,
237
+ /* output_format_stderr */ NONE,
218
238
};
219
239
220
240
static void print_usage (int /* argc */ , char ** argv) {
@@ -243,6 +263,7 @@ static void print_usage(int /* argc */, char ** argv) {
243
263
printf (" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n " );
244
264
printf (" -r, --repetitions <n> (default: %d)\n " , cmd_params_defaults.reps );
245
265
printf (" -o, --output <csv|json|md|sql> (default: %s)\n " , output_format_str (cmd_params_defaults.output_format ));
266
+ printf (" -oe, --output-err <csv|json|md|sql> (default: %s)\n " , output_format_str (cmd_params_defaults.output_format_stderr ));
246
267
printf (" -v, --verbose (default: %s)\n " , cmd_params_defaults.verbose ? " 1" : " 0" );
247
268
printf (" \n " );
248
269
printf (" Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n " );
@@ -284,6 +305,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
284
305
285
306
params.verbose = cmd_params_defaults.verbose ;
286
307
params.output_format = cmd_params_defaults.output_format ;
308
+ params.output_format_stderr = cmd_params_defaults.output_format_stderr ;
287
309
params.reps = cmd_params_defaults.reps ;
288
310
289
311
for (int i = 1 ; i < argc; i++) {
@@ -493,18 +515,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
493
515
invalid_param = true ;
494
516
break ;
495
517
}
496
- if (argv[i] == std::string (" csv" )) {
497
- params.output_format = CSV;
498
- } else if (argv[i] == std::string (" json" )) {
499
- params.output_format = JSON;
500
- } else if (argv[i] == std::string (" md" )) {
501
- params.output_format = MARKDOWN;
502
- } else if (argv[i] == std::string (" sql" )) {
503
- params.output_format = SQL;
504
- } else {
518
+ invalid_param = !output_format_from_str (argv[i], params.output_format );
519
+ } else if (arg == " -oe" || arg == " --output-err" ) {
520
+ if (++i >= argc) {
505
521
invalid_param = true ;
506
522
break ;
507
523
}
524
+ invalid_param = !output_format_from_str (argv[i], params.output_format_stderr );
508
525
} else if (arg == " -v" || arg == " --verbose" ) {
509
526
params.verbose = true ;
510
527
} else {
@@ -1278,6 +1295,22 @@ static void llama_null_log_callback(enum ggml_log_level level, const char * text
1278
1295
(void ) user_data;
1279
1296
}
1280
1297
1298
+ static std::unique_ptr<printer> create_printer (output_formats format) {
1299
+ switch (format) {
1300
+ case NONE:
1301
+ return nullptr ;
1302
+ case CSV:
1303
+ return std::unique_ptr<printer>(new csv_printer ());
1304
+ case JSON:
1305
+ return std::unique_ptr<printer>(new json_printer ());
1306
+ case MARKDOWN:
1307
+ return std::unique_ptr<printer>(new markdown_printer ());
1308
+ case SQL:
1309
+ return std::unique_ptr<printer>(new sql_printer ());
1310
+ }
1311
+ GGML_ASSERT (false );
1312
+ }
1313
+
1281
1314
int main (int argc, char ** argv) {
1282
1315
// try to set locale for unicode characters in markdown
1283
1316
setlocale (LC_CTYPE, " .UTF-8" );
@@ -1304,26 +1337,18 @@ int main(int argc, char ** argv) {
1304
1337
llama_numa_init (params.numa );
1305
1338
1306
1339
// initialize printer
1307
- std::unique_ptr<printer> p;
1308
- switch (params.output_format ) {
1309
- case CSV:
1310
- p.reset (new csv_printer ());
1311
- break ;
1312
- case JSON:
1313
- p.reset (new json_printer ());
1314
- break ;
1315
- case MARKDOWN:
1316
- p.reset (new markdown_printer ());
1317
- break ;
1318
- case SQL:
1319
- p.reset (new sql_printer ());
1320
- break ;
1321
- default :
1322
- assert (false );
1323
- exit (1 );
1340
+ std::unique_ptr<printer> p = create_printer (params.output_format );
1341
+ std::unique_ptr<printer> p_err = create_printer (params.output_format_stderr );
1342
+
1343
+ if (p) {
1344
+ p->fout = stdout;
1345
+ p->print_header (params);
1346
+ }
1347
+
1348
+ if (p_err) {
1349
+ p_err->fout = stderr;
1350
+ p_err->print_header (params);
1324
1351
}
1325
- p->fout = stdout;
1326
- p->print_header (params);
1327
1352
1328
1353
std::vector<cmd_params_instance> params_instances = get_cmd_params_instances (params);
1329
1354
@@ -1381,7 +1406,15 @@ int main(int argc, char ** argv) {
1381
1406
t.samples_ns .push_back (t_ns);
1382
1407
}
1383
1408
1384
- p->print_test (t);
1409
+ if (p) {
1410
+ p->print_test (t);
1411
+ fflush (p->fout );
1412
+ }
1413
+
1414
+ if (p_err) {
1415
+ p_err->print_test (t);
1416
+ fflush (p_err->fout );
1417
+ }
1385
1418
1386
1419
llama_print_timings (ctx);
1387
1420
@@ -1390,7 +1423,13 @@ int main(int argc, char ** argv) {
1390
1423
1391
1424
llama_free_model (lmodel);
1392
1425
1393
- p->print_footer ();
1426
+ if (p) {
1427
+ p->print_footer ();
1428
+ }
1429
+
1430
+ if (p_err) {
1431
+ p_err->print_footer ();
1432
+ }
1394
1433
1395
1434
llama_backend_free ();
1396
1435
0 commit comments