Skip to content

Commit d0a2e4b

Browse files
committed
update
1 parent ae03a08 commit d0a2e4b

File tree

1 file changed

+9
-10
lines changed

1 file changed

+9
-10
lines changed

examples/sycl/09_bmg_gemm_int4_streamk/09_bmg_gemm_int4_streamk.cpp

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@
2929
*
3030
**************************************************************************************************/
3131
/***************************************
32-
* Mixed Precision PVC Gemm Example For int4_t (RowMajor A) x (ColumnMajor B)
32+
* Mixed Precision BMG Gemm Example For int4_t (RowMajor A) x (ColumnMajor B)
3333
*
34-
* This example demonstrates how to dispatch a mixed precision GEMM on PVC, with optional dequantization.
34+
* This example demonstrates how to dispatch a mixed precision GEMM on BMG, with optional dequantization.
3535
* The GemmMode enum describes the 3 modes of operation:
3636
*
3737
* Note: due to a bug in the IGC compiler, it's currently necessary to build this example with the following
@@ -117,15 +117,15 @@ struct Options {
117117
cmd.get_cmd_line_argument("warmup", warmup, 0);
118118
cmd.get_cmd_line_argument("flush_cache", flush_cache, 0);
119119
cmd.get_cmd_line_argument("cache_cnt", cache_cnt, 3);
120-
cmd.get_cmd_line_argument("l3_cache", l3_cache, 192);
120+
cmd.get_cmd_line_argument("l3_cache", l3_cache, 32);
121121
cmd.get_cmd_line_argument("splits", splits, 2);
122122
cmd.get_cmd_line_argument("splitk", splitk, true);
123123
}
124124

125125
/// Prints the usage statement.
126126
std::ostream & print_usage(std::ostream &out) const {
127127

128-
out << "PVC int4_t StreamK GEMM Mixed Type Example\n\n"
128+
out << "BMG int4_t StreamK GEMM Mixed Type Example\n\n"
129129
<< "Options:\n\n"
130130
<< " --help If specified, displays this usage statement\n\n"
131131
<< " --dp If specified, uses Data Parallel decomposition\n"
@@ -507,17 +507,16 @@ return true;
507507

508508
float total_time = 0.f;
509509
if (options.warmup >= options.iterations) {
510-
return cutlass::Status::kErrorInternal;
510+
return cutlass::Status::kSuccess;
511511
}
512512

513513
double tflops = (2.0 * options.m * options.n * options.k * options.l) * 1e-12;
514514
double hbm = (sizeof_bits_v<ElementA> * options.m * options.k / 8 +
515515
sizeof_bits_v<ElementB> * options.k * options.n / 8 +
516516
sizeof_bits_v<ElementOutput> * options.m * options.n / 8) * 1e-9;
517517

518-
std::cout << "\nProblem Size: " << options.m << 'x' << options.n << 'x' << options.k << 'x' << options.l << std::endl;
519-
printf("--l=%d --iterations=%d --flush_cache=%d\n", options.l, options.iterations, options.flush_cache);
520-
printf("--warmup=%d, --cache_cnt=%d, --l3_cache_size=%d\n\n", options.warmup, options.cache_cnt, l3_cache_size);
518+
std::cout << "Problem Size: " << options.m << 'x' << options.n << 'x' << options.k << 'x' << options.l << ", splitk: " << options.splits << std::endl;
519+
// printf(" --iterations=%d --flush_cache=%d, --warmup=%d, --l3_cache_size=%dMB\n", options.iterations, options.flush_cache, options.warmup, options.l3_cache);
521520

522521
if (options.iterations > 0) {
523522
for (int i = 0; i < options.iterations; ++i) {
@@ -556,12 +555,12 @@ return true;
556555
total_time += ctime;
557556
}
558557

559-
printf("Cutlass GEMM Performance [%d]: [%4.3f]TFlop/s [%4.3f]GB/s (%6.4f)ms\n", i, tflops / ctime, hbm / ctime, ctime*1000);
558+
// printf("Cutlass GEMM Performance [%d]: [%4.3f]TFlop/s [%4.3f]GB/s (%6.4f)ms\n", i, tflops / ctime, hbm / ctime, ctime*1000);
560559
}
561560

562561
float cute_time = total_time / (options.iterations - options.warmup);
563562

564-
printf("Cutlass GEMM Performance average: [%4.3f]TFlop/s [%4.3f]GB/s (%6.4f)ms\n", tflops / cute_time, hbm / cute_time, cute_time*1000);
563+
// printf("Cutlass GEMM Performance average: [%4.3f]TFlop/s [%4.3f]GB/s (%6.4f)ms\n", tflops / cute_time, hbm / cute_time, cute_time*1000);
565564
}
566565

567566
return cutlass::Status::kSuccess;

0 commit comments

Comments
 (0)