turning on prefill (llama) shapes for fp8_gemm_rowwise b200 dashboard

Dhruva Kaushal · facebook-github-bot · commit a2e866a84961 · 2025-05-01T15:29:00.000-07:00
Summary: Minor change to select the correct shapes depending on the args for b200 fp8_gemm_rowwise_prefill.

Reviewed By: minjang

Differential Revision: D73976271

fbshipit-source-id: 1b52c0e570554cd4d8774be3725af354cf69c603
diff --git a/tritonbench/operators/fp8_gemm_rowwise/operator.py b/tritonbench/operators/fp8_gemm_rowwise/operator.py
@@ -177,7 +177,11 @@ def __init__(
             self.use_tma = True
             self.no_use_persistent = False
             self.warp_specialization = False
-            self.shapes = BUILDIN_SHAPES
+            self.shapes = (
+                gemm_shapes(addmm_args.prefill)
+                if (addmm_args.llama)
+                else BUILDIN_SHAPES
+            )
 
     @register_benchmark(enabled=HAS_TRITON)
     def _triton(self, xq, wq, x_scale, w_scale) -> Callable: