@@ -1491,10 +1491,10 @@ int main(int argc, char ** argv) {
1491
1491
llama_model * lmodel = nullptr ;
1492
1492
const cmd_params_instance * prev_inst = nullptr ;
1493
1493
1494
- int params_idx = 1 ;
1494
+ int params_idx = 0 ;
1495
1495
for (const auto & inst : params_instances) {
1496
- LOG_TEE (" llama-bench: starting benchmark %d/%ld\n " , params_idx, params_instances.size ());
1497
1496
params_idx ++;
1497
+ LOG_TEE (" llama-bench: benchmark %d/%ld: starting\n " , params_idx, params_instances.size ());
1498
1498
// keep the same model between tests when possible
1499
1499
if (!lmodel || !prev_inst || !inst.equal_mparams (*prev_inst)) {
1500
1500
if (lmodel) {
@@ -1544,10 +1544,12 @@ int main(int argc, char ** argv) {
1544
1544
1545
1545
// warmup run
1546
1546
if (t.n_prompt > 0 ) {
1547
+ LOG_TEE (" llama-bench: benchmark %d/%ld: warmup prompt run\n " , params_idx, params_instances.size ());
1547
1548
// test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
1548
1549
test_prompt (ctx, t.n_prompt , 0 , t.n_batch , t.n_threads );
1549
1550
}
1550
1551
if (t.n_gen > 0 ) {
1552
+ LOG_TEE (" llama-bench: benchmark %d/%ld: warmup generation run\n " , params_idx, params_instances.size ());
1551
1553
test_gen (ctx, 1 , 0 , t.n_threads );
1552
1554
}
1553
1555
@@ -1557,9 +1559,11 @@ int main(int argc, char ** argv) {
1557
1559
uint64_t t_start = get_time_ns ();
1558
1560
1559
1561
if (t.n_prompt > 0 ) {
1562
+ LOG_TEE (" llama-bench: benchmark %d/%ld: prompt run %d/%d\n " , params_idx, params_instances.size (), i + 1 , params.reps );
1560
1563
test_prompt (ctx, t.n_prompt , 0 , t.n_batch , t.n_threads );
1561
1564
}
1562
1565
if (t.n_gen > 0 ) {
1566
+ LOG_TEE (" llama-bench: benchmark %d/%ld: generation run %d/%d\n " , params_idx, params_instances.size (), i + 1 , params.reps );
1563
1567
test_gen (ctx, t.n_gen , t.n_prompt , t.n_threads );
1564
1568
}
1565
1569
0 commit comments