Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 28 additions & 8 deletions ggml/src/ggml-opencl/ggml-opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,11 +619,10 @@ struct ggml_backend_opencl_context {
cl_kernel kernel_mul_mm_iq4_nl_f32_l4_lm;

std::vector<ProfilingInfo> profiling_info;
std::vector<ProfilingInfo> profiling_results;

void write_profiling_info() {
FILE * fperf = fopen("cl_profiling.csv", "w");
if (!fperf) {
GGML_LOG_ERROR("Failed to open cl_profiling.csv\n");
void flush_profiling_batch() {
if (profiling_info.empty()) {
return;
}

Expand All @@ -647,6 +646,7 @@ struct ggml_backend_opencl_context {
CL_CHECK(clGetEventProfilingInfo(
info.evt, CL_PROFILING_COMMAND_COMPLETE, sizeof(cl_ulong), &cmd_complete, NULL));
CL_CHECK(clReleaseEvent(info.evt));
info.evt = nullptr;

char kernel_name[512];
CL_CHECK(clGetKernelInfo(info.kernel, CL_KERNEL_FUNCTION_NAME,
Expand All @@ -664,10 +664,26 @@ struct ggml_backend_opencl_context {
info.cmd_complete_duration_ns = cmd_complete - cmd_end;
info.cmd_total_duration_ns = cmd_complete - cmd_queued;
}
profiling_results.insert(profiling_results.end(),
std::make_move_iterator(profiling_info.begin()),
std::make_move_iterator(profiling_info.end()));
profiling_info.clear();
}

void write_profiling_info() {
if (profiling_results.empty()) {
return;
}

// Dump a csv
FILE * fperf = fopen("cl_profiling.csv", "w");
if (!fperf) {
GGML_LOG_ERROR("Failed to open cl_profiling.csv\n");
return;
}

fprintf(fperf, "op name, kernel name, exec duration (ms), global size, local size, output size\n");
for (const ProfilingInfo & info : profiling_info) {
for (const ProfilingInfo & info : profiling_results) {
fprintf(fperf, "%s,%s,%f,%zux%zux%zu,%zux%zux%zu,%zux%zux%zux%zu\n",
info.op_name.c_str(), info.kernel_name.c_str(),
info.cmd_duration_ns/1.e6f,
Expand All @@ -678,14 +694,14 @@ struct ggml_backend_opencl_context {
fclose(fperf);

// Dump a simple chrome trace
FILE* ftrace = fopen("cl_trace.json", "w");
FILE * ftrace = fopen("cl_trace.json", "w");
if (!ftrace) {
GGML_LOG_ERROR("Failed to open cl_trace.json\n");
return;
}

fprintf(ftrace, "[\n");
for (const ProfilingInfo & info : profiling_info) {
for (const ProfilingInfo & info : profiling_results) {
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Host\"},\n",
info.kernel_name.c_str(), info.cmd_queued/1000);
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Host\"},\n",
Expand All @@ -696,6 +712,7 @@ struct ggml_backend_opencl_context {
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Device\"},\n",
info.kernel_name.c_str(), info.cmd_end/1000);
}
fprintf(ftrace, "]\n");
fclose(ftrace);
}

Expand All @@ -716,6 +733,9 @@ struct ggml_backend_opencl_context {

profiling_info.emplace_back();
populateProfilingInfo(profiling_info.back(), evt, kernel, work_dim, global_work_size, local_work_size, tensor);
if (profiling_info.size() >= 2048) {
flush_profiling_batch();
}
#else
GGML_UNUSED(tensor);
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, work_dim, NULL, global_work_size, local_work_size, 0, NULL, NULL));
Expand Down Expand Up @@ -760,7 +780,7 @@ struct ggml_backend_opencl_context {
if (ref_count == 0) {
#ifdef GGML_OPENCL_PROFILING
write_profiling_info();
profiling_info.clear();
profiling_results.clear();
#endif
}
}
Expand Down
Loading