@@ -520,5 +520,67 @@ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, "V4fiV2iV4fs",
520
520
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, " V4fiV2iV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
521
521
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, " V4fiV2iV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
522
522
523
+ // ===----------------------------------------------------------------------===//
524
+ // WMMA builtins.
525
+ // Postfix w32 indicates the builtin requires wavefront size of 32.
526
+ // Postfix w64 indicates the builtin requires wavefront size of 64.
527
+ //
528
+ // Some of these are very similar to their GFX11 counterparts, but they don't
529
+ // require replication of the A,B matrices, so they use fewer vector elements.
530
+ // Therefore, we add an "_gfx12" suffix to distinguish them from the existing
531
+ // builtins.
532
+ // ===----------------------------------------------------------------------===//
533
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12, " V8fV8hV8hV8f" , " nc" , " gfx12-insts,wavefrontsize32" )
534
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12, " V8fV8sV8sV8f" , " nc" , " gfx12-insts,wavefrontsize32" )
535
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12, " V8hV8hV8hV8h" , " nc" , " gfx12-insts,wavefrontsize32" )
536
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12, " V8sV8sV8sV8s" , " nc" , " gfx12-insts,wavefrontsize32" )
537
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12, " V8iIbV2iIbV2iV8iIb" , " nc" , " gfx12-insts,wavefrontsize32" )
538
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12, " V8iIbiIbiV8iIb" , " nc" , " gfx12-insts,wavefrontsize32" )
539
+ // These are gfx12-only, but for consistency with the other WMMA variants we're
540
+ // keeping the "_gfx12" suffix.
541
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12, " V8fV2iV2iV8f" , " nc" , " gfx12-insts,wavefrontsize32" )
542
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12, " V8fV2iV2iV8f" , " nc" , " gfx12-insts,wavefrontsize32" )
543
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12, " V8fV2iV2iV8f" , " nc" , " gfx12-insts,wavefrontsize32" )
544
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12, " V8fV2iV2iV8f" , " nc" , " gfx12-insts,wavefrontsize32" )
545
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12, " V8iIbV2iIbV2iV8iIb" , " nc" , " gfx12-insts,wavefrontsize32" )
546
+
547
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12, " V4fV4hV4hV4f" , " nc" , " gfx12-insts,wavefrontsize64" )
548
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12, " V4fV4sV4sV4f" , " nc" , " gfx12-insts,wavefrontsize64" )
549
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12, " V4hV4hV4hV4h" , " nc" , " gfx12-insts,wavefrontsize64" )
550
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12, " V4sV4sV4sV4s" , " nc" , " gfx12-insts,wavefrontsize64" )
551
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12, " V4iIbiIbiV4iIb" , " nc" , " gfx12-insts,wavefrontsize64" )
552
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12, " V4iIbiIbiV4iIb" , " nc" , " gfx12-insts,wavefrontsize64" )
553
+ // These are gfx12-only, but for consistency with the other WMMA variants we're
554
+ // keeping the "_gfx12" suffix.
555
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12, " V4fiiV4f" , " nc" , " gfx12-insts,wavefrontsize64" )
556
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12, " V4fiiV4f" , " nc" , " gfx12-insts,wavefrontsize64" )
557
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12, " V4fiiV4f" , " nc" , " gfx12-insts,wavefrontsize64" )
558
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12, " V4fiiV4f" , " nc" , " gfx12-insts,wavefrontsize64" )
559
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12, " V4iIbiIbiV4iIb" , " nc" , " gfx12-insts,wavefrontsize64" )
560
+
561
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32, " V8fV8hV16hV8fs" , " nc" , " gfx12-insts,wavefrontsize32" )
562
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32, " V8fV8sV16sV8fs" , " nc" , " gfx12-insts,wavefrontsize32" )
563
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32, " V8hV8hV16hV8hs" , " nc" , " gfx12-insts,wavefrontsize32" )
564
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32, " V8sV8sV16sV8ss" , " nc" , " gfx12-insts,wavefrontsize32" )
565
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32, " V8iIbV2iIbV4iV8isIb" , " nc" , " gfx12-insts,wavefrontsize32" )
566
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32, " V8iIbiIbV2iV8isIb" , " nc" , " gfx12-insts,wavefrontsize32" )
567
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32, " V8iIbV2iIbV4iV8isIb" , " nc" , " gfx12-insts,wavefrontsize32" )
568
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32, " V8fV2iV4iV8fs" , " nc" , " gfx12-insts,wavefrontsize32" )
569
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32, " V8fV2iV4iV8fs" , " nc" , " gfx12-insts,wavefrontsize32" )
570
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32, " V8fV2iV4iV8fs" , " nc" , " gfx12-insts,wavefrontsize32" )
571
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32, " V8fV2iV4iV8fs" , " nc" , " gfx12-insts,wavefrontsize32" )
572
+
573
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64, " V4fV4hV8hV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
574
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64, " V4fV4sV8sV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
575
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64, " V4hV4hV8hV4hs" , " nc" , " gfx12-insts,wavefrontsize64" )
576
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64, " V4sV4sV8sV4ss" , " nc" , " gfx12-insts,wavefrontsize64" )
577
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64, " V4iIbiIbV2iV4isIb" , " nc" , " gfx12-insts,wavefrontsize64" )
578
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64, " V4iIbiIbiV4isIb" , " nc" , " gfx12-insts,wavefrontsize64" )
579
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64, " V4iIbiIbV2iV4isIb" , " nc" , " gfx12-insts,wavefrontsize64" )
580
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64, " V4fiV2iV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
581
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, " V4fiV2iV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
582
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, " V4fiV2iV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
583
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, " V4fiV2iV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
584
+
523
585
#undef BUILTIN
524
586
#undef TARGET_BUILTIN
0 commit comments