@@ -522,5 +522,67 @@ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, "V4fiV2iV4fs",
522
522
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, " V4fiV2iV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
523
523
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, " V4fiV2iV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
524
524
525
+ // ===----------------------------------------------------------------------===//
526
+ // WMMA builtins.
527
+ // Postfix w32 indicates the builtin requires wavefront size of 32.
528
+ // Postfix w64 indicates the builtin requires wavefront size of 64.
529
+ //
530
+ // Some of these are very similar to their GFX11 counterparts, but they don't
531
+ // require replication of the A,B matrices, so they use fewer vector elements.
532
+ // Therefore, we add an "_gfx12" suffix to distinguish them from the existing
533
+ // builtins.
534
+ // ===----------------------------------------------------------------------===//
535
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12, " V8fV8hV8hV8f" , " nc" , " gfx12-insts,wavefrontsize32" )
536
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12, " V8fV8sV8sV8f" , " nc" , " gfx12-insts,wavefrontsize32" )
537
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12, " V8hV8hV8hV8h" , " nc" , " gfx12-insts,wavefrontsize32" )
538
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12, " V8sV8sV8sV8s" , " nc" , " gfx12-insts,wavefrontsize32" )
539
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12, " V8iIbV2iIbV2iV8iIb" , " nc" , " gfx12-insts,wavefrontsize32" )
540
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12, " V8iIbiIbiV8iIb" , " nc" , " gfx12-insts,wavefrontsize32" )
541
+ // These are gfx12-only, but for consistency with the other WMMA variants we're
542
+ // keeping the "_gfx12" suffix.
543
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12, " V8fV2iV2iV8f" , " nc" , " gfx12-insts,wavefrontsize32" )
544
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12, " V8fV2iV2iV8f" , " nc" , " gfx12-insts,wavefrontsize32" )
545
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12, " V8fV2iV2iV8f" , " nc" , " gfx12-insts,wavefrontsize32" )
546
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12, " V8fV2iV2iV8f" , " nc" , " gfx12-insts,wavefrontsize32" )
547
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12, " V8iIbV2iIbV2iV8iIb" , " nc" , " gfx12-insts,wavefrontsize32" )
548
+
549
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12, " V4fV4hV4hV4f" , " nc" , " gfx12-insts,wavefrontsize64" )
550
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12, " V4fV4sV4sV4f" , " nc" , " gfx12-insts,wavefrontsize64" )
551
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12, " V4hV4hV4hV4h" , " nc" , " gfx12-insts,wavefrontsize64" )
552
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12, " V4sV4sV4sV4s" , " nc" , " gfx12-insts,wavefrontsize64" )
553
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12, " V4iIbiIbiV4iIb" , " nc" , " gfx12-insts,wavefrontsize64" )
554
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12, " V4iIbiIbiV4iIb" , " nc" , " gfx12-insts,wavefrontsize64" )
555
+ // These are gfx12-only, but for consistency with the other WMMA variants we're
556
+ // keeping the "_gfx12" suffix.
557
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12, " V4fiiV4f" , " nc" , " gfx12-insts,wavefrontsize64" )
558
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12, " V4fiiV4f" , " nc" , " gfx12-insts,wavefrontsize64" )
559
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12, " V4fiiV4f" , " nc" , " gfx12-insts,wavefrontsize64" )
560
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12, " V4fiiV4f" , " nc" , " gfx12-insts,wavefrontsize64" )
561
+ TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12, " V4iIbiIbiV4iIb" , " nc" , " gfx12-insts,wavefrontsize64" )
562
+
563
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32, " V8fV8hV16hV8fs" , " nc" , " gfx12-insts,wavefrontsize32" )
564
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32, " V8fV8sV16sV8fs" , " nc" , " gfx12-insts,wavefrontsize32" )
565
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32, " V8hV8hV16hV8hs" , " nc" , " gfx12-insts,wavefrontsize32" )
566
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32, " V8sV8sV16sV8ss" , " nc" , " gfx12-insts,wavefrontsize32" )
567
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32, " V8iIbV2iIbV4iV8isIb" , " nc" , " gfx12-insts,wavefrontsize32" )
568
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32, " V8iIbiIbV2iV8isIb" , " nc" , " gfx12-insts,wavefrontsize32" )
569
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32, " V8iIbV2iIbV4iV8isIb" , " nc" , " gfx12-insts,wavefrontsize32" )
570
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32, " V8fV2iV4iV8fs" , " nc" , " gfx12-insts,wavefrontsize32" )
571
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32, " V8fV2iV4iV8fs" , " nc" , " gfx12-insts,wavefrontsize32" )
572
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32, " V8fV2iV4iV8fs" , " nc" , " gfx12-insts,wavefrontsize32" )
573
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32, " V8fV2iV4iV8fs" , " nc" , " gfx12-insts,wavefrontsize32" )
574
+
575
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64, " V4fV4hV8hV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
576
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64, " V4fV4sV8sV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
577
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64, " V4hV4hV8hV4hs" , " nc" , " gfx12-insts,wavefrontsize64" )
578
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64, " V4sV4sV8sV4ss" , " nc" , " gfx12-insts,wavefrontsize64" )
579
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64, " V4iIbiIbV2iV4isIb" , " nc" , " gfx12-insts,wavefrontsize64" )
580
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64, " V4iIbiIbiV4isIb" , " nc" , " gfx12-insts,wavefrontsize64" )
581
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64, " V4iIbiIbV2iV4isIb" , " nc" , " gfx12-insts,wavefrontsize64" )
582
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64, " V4fiV2iV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
583
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, " V4fiV2iV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
584
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, " V4fiV2iV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
585
+ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, " V4fiV2iV4fs" , " nc" , " gfx12-insts,wavefrontsize64" )
586
+
525
587
#undef BUILTIN
526
588
#undef TARGET_BUILTIN
0 commit comments