Skip to content

Commit 8a03f7f

Browse files
committed
QueryInternal perf
Signed-off-by: Gangmuk Lim <gangmuk@gmail.com>
1 parent f51c9d8 commit 8a03f7f

1 file changed

Lines changed: 6 additions & 6 deletions

File tree

modules/llm-cache/storage/aibrix_blob_storage.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,6 @@ Status AIBrixBlobStorage::QueryInternal(
495495
if (it->second.chunk_builder == nullptr) {
496496
VLOG(100) << "Loading " << obj_name;
497497
VINEYARD_ASSERT(it->second.object_id != InvalidObjectID());
498-
auto load_start = std::chrono::system_clock::now();
499498
auto status = KVCacheChunkBuilder::Make(
500499
it->second.chunk_builder, rpc_client_, tensor_nbytes_, layer_,
501500
chunk_size_, kv_cache_ns_, it->second.object_id);
@@ -504,10 +503,8 @@ Status AIBrixBlobStorage::QueryInternal(
504503
// skip this and rest chunks
505504
break;
506505
} else {
507-
auto load_end = std::chrono::system_clock::now();
508-
auto load_time_in_ms = std::chrono::duration_cast<std::chrono::milliseconds>(load_end - load_start).count();
509506
VLOG(100) << "obj name=" << obj_name
510-
<< ", obj id=" << ObjectIDToString(it->second.object_id) << ", load latency " << load_time_in_ms << " ms";
507+
<< ", obj id=" << ObjectIDToString(it->second.object_id);
511508
}
512509
}
513510
it->second.access_bit = true;
@@ -532,9 +529,12 @@ Status AIBrixBlobStorage::QueryInternal(
532529
Status first_error = Status::OK();
533530
WAIT_TASK_RESULTS(tids, matched, first_error, obj_names);
534531
auto query_end_time = std::chrono::system_clock::now();
532+
auto hit_ratio = matched / static_cast<float>(tokens.size());
535533
auto total_time_in_ms = std::chrono::duration_cast<std::chrono::milliseconds>(query_end_time - query_start_time).count();
536-
VLOG(100) << "Query total latency " << total_time_in_ms << " ms";
537-
VLOG(100) << "Cache Hit Ratio: " << (matched / static_cast<float>(tokens.size())) * 100.0f << "%";
534+
size_t total_kv_tensors_size = tokens.size() * tensor_nbytes_ * 2 * layer_;
535+
VLOG(100) << "QueryInternalPerf,Query_start_time," << query_end_time << ",Query_end_time," << query_end_time << "," << total_time_in_ms << ",threads," << thread_count
536+
<< ",tokens," << tokens.size() << ",matched," << matched << ",hit_ratio," << hit_ratio
537+
<< ",num_objects," << obj_names.size() << ",total_kv_tensors_size," << kv_tensors.size();
538538
return first_error;
539539
}
540540

0 commit comments

Comments
 (0)