Skip to content

[AMDGPU][MC][True16] VOP3dot instruction update for true16/fake16 #113474

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 9, 2024

Conversation

broxigarchen
Copy link
Contributor

@broxigarchen broxigarchen commented Oct 23, 2024

Update VOP3dot instructions with true16 and fake16 formats.

This patch includes instructions:
v_dot2_f16_f16
v_dot2_bf16_bf16

@broxigarchen broxigarchen marked this pull request as ready for review October 23, 2024 18:05
@llvmbot llvmbot added backend:AMDGPU mc Machine (object) code labels Oct 23, 2024
@llvmbot
Copy link
Member

llvmbot commented Oct 23, 2024

@llvm/pr-subscribers-mc

Author: Brox Chen (broxigarchen)

Changes

Patch is 171.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113474.diff

14 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+39-7)
  • (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+29-12)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3.s (+78-66)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s (+98-25)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s (+96-26)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3.s (+57-33)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s (+34-22)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s (+32-20)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt (+160-37)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt (+190-21)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt (+184-21)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt (+140-29)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt (+70-10)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt (+70-10)
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 34ecdb56e8689d..caceb251f719b1 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -931,6 +931,30 @@ class VOP3_DOT_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> {
   let HasOMod = 0;
 }
 
+class VOP3_DOT_Profile_t16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile_True16<P, Features> {
+  let HasClamp = 0;
+  let HasOMod = 0;
+  // Override modifiers for bf16(i16) (same as float modifiers).
+  let HasSrc0Mods = 1;
+  let HasSrc1Mods = 1;
+  let HasSrc2Mods = 1;
+  let Src0ModVOP3DPP = FPVRegInputMods;
+  let Src1ModVOP3DPP = FP32VCSrcInputMods;
+  let Src2ModVOP3DPP = FPT16VCSrcInputMods</*IsFake16*/0>;
+}
+
+class VOP3_DOT_Profile_fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile_Fake16<P, Features> {
+  let HasClamp = 0;
+  let HasOMod = 0;
+  // Override modifiers for bf16(i16) (same as float modifiers).
+  let HasSrc0Mods = 1;
+  let HasSrc1Mods = 1;
+  let HasSrc2Mods = 1;
+  let AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
+   HasOpSel, HasOMod, IsVOP3P, HasModifiers, 1/*HasSrc0Mods*/, 1/*HasSrc1Mods*/,
+   1/*HasSrc2Mods*/, DstVT>.ret;
+}
+
 let SubtargetPredicate = isGFX11Plus in {
   defm V_MAXMIN_F32     : VOP3Inst<"v_maxmin_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
   defm V_MINMAX_F32     : VOP3Inst<"v_minmax_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
@@ -951,9 +975,15 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
   defm V_MINIMUMMAXIMUM_F16 : VOP3Inst<"v_minimummaximum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
 } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
 
-let OtherPredicates = [HasDot9Insts], IsDOT=1 in {
-  defm V_DOT2_F16_F16 :   VOP3Inst<"v_dot2_f16_f16",   VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>, int_amdgcn_fdot2_f16_f16>;
-  defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_BF16_V2BF16_V2BF16_BF16>, int_amdgcn_fdot2_bf16_bf16>;
+let SubtargetPredicate = HasDot9Insts, IsDOT=1 in {
+  defm V_DOT2_F16_F16 :   VOP3Inst_t16_with_profiles<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>,
+                                                      VOP3_DOT_Profile_t16<VOP_F16_V2F16_V2F16_F16>,
+                                                      VOP3_DOT_Profile_fake16<VOP_F16_V2F16_V2F16_F16>,
+                                                      int_amdgcn_fdot2_f16_f16>;
+  defm V_DOT2_BF16_BF16 : VOP3Inst_t16_with_profiles<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_BF16_V2BF16_V2BF16_BF16>,
+                                                      VOP3_DOT_Profile_t16<VOP_BF16_V2BF16_V2BF16_BF16>,
+                                                      VOP3_DOT_Profile_fake16<VOP_BF16_V2BF16_V2BF16_BF16>,
+                                                      int_amdgcn_fdot2_bf16_bf16>;
 }
 
 class VOP_Pseudo_Scalar<RegisterClass Dst, RegisterOperand SrcOp,
@@ -1112,8 +1142,10 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
   VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
   VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
 
-multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
-  VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
+multiclass VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME> {
+  defm _t16: VOP3Dot_Realtriple_gfx11_gfx12<op, asmName, 0, opName#"_t16">;
+  defm _fake16: VOP3Dot_Realtriple_gfx11_gfx12<op, asmName, 0, opName#"_fake16">;
+}
 
 multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
                                            string pseudo_mnemonic = "", bit isSingle = 0> :
@@ -1205,8 +1237,8 @@ defm V_MAXMIN_U32          : VOP3_Realtriple_gfx11_gfx12<0x262>;
 defm V_MINMAX_U32          : VOP3_Realtriple_gfx11_gfx12<0x263>;
 defm V_MAXMIN_I32          : VOP3_Realtriple_gfx11_gfx12<0x264>;
 defm V_MINMAX_I32          : VOP3_Realtriple_gfx11_gfx12<0x265>;
-defm V_DOT2_F16_F16        : VOP3Dot_Realtriple_gfx11_gfx12<0x266>;
-defm V_DOT2_BF16_BF16      : VOP3Dot_Realtriple_gfx11_gfx12<0x267>;
+defm V_DOT2_F16_F16        : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x266, "v_dot2_f16_f16">;
+defm V_DOT2_BF16_BF16      : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x267, "v_dot2_bf16_bf16">;
 defm V_DIV_SCALE_F32       : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">;
 defm V_DIV_SCALE_F64       : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
 defm V_MAD_U64_U32_gfx11   : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index aab5dc7465d938..58dc812bc3ef6a 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -347,9 +347,12 @@ class VOP3FP8OpSel_src_bytesel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3e_gf
    let Inst{14-13} = byte_sel;  // op_sel2/3
  }
 
-class VOP3DotOpSel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3OpSel_gfx11_gfx12<op, p>{
+class VOP3DotOpSel_gfx11_gfx12<bits<10> op, VOPProfile p> :
+    VOP3e_t16_gfx11_gfx12<op, p>{
   let Inst{11} = ?;
   let Inst{12} = ?;
+  let Inst{13} = !if(p.HasSrc2Mods, src2_modifiers{2}, 0);
+  let Inst{14} = !if(!and(p.HasDst, p.HasSrc0Mods),  src0_modifiers{3}, 0);
 }
 
 // NB: For V_INTERP* opcodes, src0 is encoded as src1 and vice versa
@@ -1611,10 +1614,12 @@ multiclass VOP3_Real_Base<GFXGen Gen, bits<10> op, string opName = NAME,
   }
 }
 
-multiclass VOP3Dot_Real_Base<GFXGen Gen, bits<10> op, string opName = NAME,
+multiclass VOP3Dot_Real_Base<GFXGen Gen, bits<10> op, string asmName, string opName = NAME,
                              bit isSingle = 0> {
   defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
-  let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
+  let AsmString = asmName # ps.AsmOperands,
+      DecoderNamespace = Gen.DecoderNamespace # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"),
+      IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
     def _e64#Gen.Suffix :
       VOP3_Real_Gen<ps, Gen>,
       VOP3DotOpSel_gfx11_gfx12<op, ps.Pfl>;
@@ -1678,9 +1683,13 @@ multiclass VOP3_Real_dpp_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
       VOP3_DPP16_Gen<op, ps, Gen>;
 }
 
-multiclass VOP3Dot_Real_dpp_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
+multiclass VOP3Dot_Real_dpp_Base<GFXGen Gen, bits<10> op, string asmName, string opName = NAME> {
+  defvar ps = !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp");
   def _e64_dpp#Gen.Suffix :
-    VOP3_DPP16_Gen<op, !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp"), Gen> {
+    VOP3_DPP16_Gen_t16<op, ps, Gen> {
+      let AsmString = asmName # ps.Pfl.AsmVOP3DPP16;
+      let DecoderNamespace = Gen.DecoderNamespace
+                             # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
       let Inst{11} = ?;
       let Inst{12} = ?;
     }
@@ -1702,12 +1711,14 @@ multiclass VOP3_Real_dpp8_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
   }
 }
 
-multiclass VOP3Dot_Real_dpp8_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
+multiclass VOP3Dot_Real_dpp8_Base<GFXGen Gen, bits<10> op, string asmName, string opName = NAME> {
   defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
-  def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8<op, ps> {
+  def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8_t16<op, ps> {
     let Inst{11} = ?;
     let Inst{12} = ?;
-    let DecoderNamespace = Gen.DecoderNamespace;
+    let AsmString = asmName # ps.Pfl.AsmVOP3DPP8;
+    let DecoderNamespace = Gen.DecoderNamespace
+                           # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
     let AssemblerPredicate = Gen.AssemblerPredicate;
   }
 }
@@ -1760,11 +1771,11 @@ multiclass VOP3_Realtriple<GFXGen Gen, bits<10> op, bit isSingle = 0,
   VOP3_Real_dpp_Base<Gen, op, opName>,
   VOP3_Real_dpp8_Base<Gen, op, opName>;
 
-multiclass VOP3Dot_Realtriple<GFXGen Gen, bits<10> op, bit isSingle = 0,
+multiclass VOP3Dot_Realtriple<GFXGen Gen, bits<10> op, string asmName, bit isSingle = 0,
                               string opName = NAME> :
-  VOP3Dot_Real_Base<Gen, op, opName, isSingle>,
-  VOP3Dot_Real_dpp_Base<Gen, op, opName>,
-  VOP3Dot_Real_dpp8_Base<Gen, op, opName>;
+  VOP3Dot_Real_Base<Gen, op, asmName, opName, isSingle>,
+  VOP3Dot_Real_dpp_Base<Gen, op, asmName, opName>,
+  VOP3Dot_Real_dpp8_Base<Gen, op, asmName, opName>;
 
 multiclass VOP3Only_Realtriple<GFXGen Gen, bits<10> op> :
   VOP3_Realtriple<Gen, op, 1>;
@@ -1862,6 +1873,12 @@ multiclass VOP3Only_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
   VOP3Only_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
   VOP3Only_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
 
+multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op, string asmName, bit isSingle = 0,
+                                          string opName = NAME> :
+  VOP3Dot_Realtriple<GFX11Gen, op, asmName, isSingle, opName>,
+  VOP3Dot_Realtriple<GFX12Gen, op, asmName, isSingle, opName>;
+
+
 //===----------------------------------------------------------------------===//
 
 include "VOPCInstructions.td"
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
index 210d55898367d8..1f6dc01b5d6e96 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -2119,104 +2119,116 @@ v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2
 // W64: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
 
-v_dot2_bf16_bf16 v5, v1, v2, s3
-// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00]
+v_dot2_bf16_bf16 v5.l, v1, v2, s3
+// GFX11: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00]
 
-v_dot2_bf16_bf16 v5, v255, v255, s105
-// GFX11: encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01]
+v_dot2_bf16_bf16 v5.l, v255, v255, s105
+// GFX11: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01]
 
-v_dot2_bf16_bf16 v5, s1, s2, v3
-// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04]
+v_dot2_bf16_bf16 v5.l, s1, s2, v3.l
+// GFX11: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04]
 
-v_dot2_bf16_bf16 v5, s105, s105, m0
-// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01]
+v_dot2_bf16_bf16 v5.l, s105, s105, m0
+// GFX11: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01]
 
-v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255
-// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07]
+v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h
+// GFX11: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07]
 
-v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi
-// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00]
+v_dot2_bf16_bf16 v5.l, vcc_hi, 0xfe0b, vcc_hi
+// GFX11: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00]
 
-v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15
-// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01]
+v_dot2_bf16_bf16 v5.l, ttmp15, src_scc, ttmp15
+// GFX11: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01]
 
-v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo
-// GFX11: encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81]
+v_dot2_bf16_bf16 v5.l, |m0|, -1, -vcc_lo
+// GFX11: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81]
 
-v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b|
-// GFX11: encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00]
+v_dot2_bf16_bf16 v5.l, -|exec_lo|, null, -|0xfe0b|
+// GFX11: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00]
 
-v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo|
-// GFX11: encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1]
+v_dot2_bf16_bf16 v5.l, -|exec_hi|, -|exec_lo|, -|exec_lo|
+// GFX11: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1]
 
-v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc|
-// GFX11: encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43]
+v_dot2_bf16_bf16 v5.l, null, -exec_hi, |src_scc|
+// GFX11: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43]
 
-v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| op_sel:[0,0,0,0]
-// GFX11: encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1]
+v_dot2_bf16_bf16 v5.l, -1, -|m0|, -|exec_hi|
+// GFX11: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1]
 
-v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
-// GFX11: encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23]
+v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1
+// GFX11: [0x05,0x02,0x67,0xd6,0xfd,0xd4,0x04,0x23]
 
-v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1]
-// GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null
+// GFX11: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 
-v_dot2_bf16_bf16 v2, v0, 0x20004000, v2
-// GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
+v_dot2_bf16_bf16 v2.l, v0, 0x20004000, v2.l
+// GFX11: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
 
-v_dot2_bf16_bf16 v2, 0x20004000, v0, v2
-// GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
+v_dot2_bf16_bf16 v2.l, 0x20004000, v0, v2.l
+// GFX11: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
 
-v_dot2_f16_f16 v5, v1, v2, s3
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00]
+v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h
+// GFX11: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07]
 
-v_dot2_f16_f16 v5, v255, s2, s105
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01]
+v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null
+// GFX11: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 
-v_dot2_f16_f16 v5, s1, v255, exec_hi
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01]
+v_dot2_f16_f16 v5.l, v1, v2, s3
+// GFX11: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00]
 
-v_dot2_f16_f16 v5, s105, s105, exec_lo
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01]
+v_dot2_f16_f16 v5.l, v255, s2, s105
+// GFX11: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01]
 
-v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04]
+v_dot2_f16_f16 v5.l, s1, v255, exec_hi
+// GFX11: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01]
 
-v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+v_dot2_f16_f16 v5.l, s105, s105, exec_lo
+// GFX11: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01]
 
-v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15|
-// GFX11: encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1]
+v_dot2_f16_f16 v5.l, vcc_lo, ttmp15, v3.l
+// GFX11: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04]
 
-v_dot2_f16_f16 v5, m0, 0.5, m0
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01]
+v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h
+// GFX11: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
 
-v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi
-// GFX11: encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01]
+v_dot2_f16_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15|
+// GFX11: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1]
 
-v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo|
-// GFX11: encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1]
+v_dot2_f16_f16 v5.l, m0, 0.5, m0
+// GFX11: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01]
 
-v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b|
-// GFX11: encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+v_dot2_f16_f16 v5.l, |exec_lo|, -1, vcc_hi
+// GFX11: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01]
 
-v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc|
-// GFX11: encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3]
+v_dot2_f16_f16 v5.l, -|exec_hi|, null, -|vcc_lo|
+// GFX11: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1]
 
-v_dot2_f16_f16 v5, 0.5, -m0, 0.5 op_sel:[0,0,0,0]
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43]
+v_dot2_f16_f16 v5.l, null, exec_lo, -|0xfe0b|
+// GFX11: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
 
-v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
-// GFX11: encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23]
+v_dot2_f16_f16 v5.l, -1, -|exec_hi|, -|src_scc|
+// GFX11: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3]
 
-v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1]
-// GFX11: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+v_dot2_f16_f16 v5.l, 0.5, -m0, 0.5
+// GFX11: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43]
 
-v_dot2_f16_f16 v2, v0, 0x20004000, v2
-// GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
+v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1
+// GFX11: [0x05,0x02,0x66,0xd6,0xfd,0xd4,0x04,0x23]
 
-v_dot2_f16_f16 v2, 0x20004000, v0, v2
-// GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
+v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null
+// GFX11: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+
+v_dot2_f16_f16 v2.l, v0, 0x20004000, v2.l
+// GFX11: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
+
+v_dot2_f16_f16 v2.l, 0x20004000, v0, v2.l
+// GFX11: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
+
+v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h
+// GFX11: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+
+v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null
+// GFX11: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 
 v_fma_dx9_zero_f32 v5, v1, v2, s3
 // GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
index c82b61e21edf64..ba594e7406fc52 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
@@ -4700,44 +4700,117 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 ban
 v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
 // GFX11: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
-v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
-// GFX11: encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
+v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand
+
+v_dot2_f16_f16_e64_dpp v0.l, s1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_dot2_f16_f16_e64_dpp v0.l, v1, s2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
-v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
 // GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand
 
-v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+v_dot2_bf16_bf16_e64_dpp v0.l, s1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
-v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+v_dot2_bf16_bf16_e64_dpp v0.l, v1, s2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
-v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
-// GFX11: encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
+v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v3.l quad_perm:[3,2,1,0]
+// GFX11: [0x05,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
 
-v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
-// GFX11: encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00]
+v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h quad_perm:[0,1,2,3]
+// GFX11: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xf...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Oct 23, 2024

@llvm/pr-subscribers-backend-amdgpu

Author: Brox Chen (broxigarchen)

Changes

Patch is 171.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113474.diff

14 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+39-7)
  • (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+29-12)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3.s (+78-66)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s (+98-25)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s (+96-26)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3.s (+57-33)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s (+34-22)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s (+32-20)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt (+160-37)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt (+190-21)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt (+184-21)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt (+140-29)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt (+70-10)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt (+70-10)
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 34ecdb56e8689d..caceb251f719b1 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -931,6 +931,30 @@ class VOP3_DOT_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> {
   let HasOMod = 0;
 }
 
+class VOP3_DOT_Profile_t16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile_True16<P, Features> {
+  let HasClamp = 0;
+  let HasOMod = 0;
+  // Override modifiers for bf16(i16) (same as float modifiers).
+  let HasSrc0Mods = 1;
+  let HasSrc1Mods = 1;
+  let HasSrc2Mods = 1;
+  let Src0ModVOP3DPP = FPVRegInputMods;
+  let Src1ModVOP3DPP = FP32VCSrcInputMods;
+  let Src2ModVOP3DPP = FPT16VCSrcInputMods</*IsFake16*/0>;
+}
+
+class VOP3_DOT_Profile_fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile_Fake16<P, Features> {
+  let HasClamp = 0;
+  let HasOMod = 0;
+  // Override modifiers for bf16(i16) (same as float modifiers).
+  let HasSrc0Mods = 1;
+  let HasSrc1Mods = 1;
+  let HasSrc2Mods = 1;
+  let AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
+   HasOpSel, HasOMod, IsVOP3P, HasModifiers, 1/*HasSrc0Mods*/, 1/*HasSrc1Mods*/,
+   1/*HasSrc2Mods*/, DstVT>.ret;
+}
+
 let SubtargetPredicate = isGFX11Plus in {
   defm V_MAXMIN_F32     : VOP3Inst<"v_maxmin_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
   defm V_MINMAX_F32     : VOP3Inst<"v_minmax_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
@@ -951,9 +975,15 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
   defm V_MINIMUMMAXIMUM_F16 : VOP3Inst<"v_minimummaximum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
 } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
 
-let OtherPredicates = [HasDot9Insts], IsDOT=1 in {
-  defm V_DOT2_F16_F16 :   VOP3Inst<"v_dot2_f16_f16",   VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>, int_amdgcn_fdot2_f16_f16>;
-  defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_BF16_V2BF16_V2BF16_BF16>, int_amdgcn_fdot2_bf16_bf16>;
+let SubtargetPredicate = HasDot9Insts, IsDOT=1 in {
+  defm V_DOT2_F16_F16 :   VOP3Inst_t16_with_profiles<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>,
+                                                      VOP3_DOT_Profile_t16<VOP_F16_V2F16_V2F16_F16>,
+                                                      VOP3_DOT_Profile_fake16<VOP_F16_V2F16_V2F16_F16>,
+                                                      int_amdgcn_fdot2_f16_f16>;
+  defm V_DOT2_BF16_BF16 : VOP3Inst_t16_with_profiles<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_BF16_V2BF16_V2BF16_BF16>,
+                                                      VOP3_DOT_Profile_t16<VOP_BF16_V2BF16_V2BF16_BF16>,
+                                                      VOP3_DOT_Profile_fake16<VOP_BF16_V2BF16_V2BF16_BF16>,
+                                                      int_amdgcn_fdot2_bf16_bf16>;
 }
 
 class VOP_Pseudo_Scalar<RegisterClass Dst, RegisterOperand SrcOp,
@@ -1112,8 +1142,10 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
   VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
   VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
 
-multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
-  VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
+multiclass VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME> {
+  defm _t16: VOP3Dot_Realtriple_gfx11_gfx12<op, asmName, 0, opName#"_t16">;
+  defm _fake16: VOP3Dot_Realtriple_gfx11_gfx12<op, asmName, 0, opName#"_fake16">;
+}
 
 multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
                                            string pseudo_mnemonic = "", bit isSingle = 0> :
@@ -1205,8 +1237,8 @@ defm V_MAXMIN_U32          : VOP3_Realtriple_gfx11_gfx12<0x262>;
 defm V_MINMAX_U32          : VOP3_Realtriple_gfx11_gfx12<0x263>;
 defm V_MAXMIN_I32          : VOP3_Realtriple_gfx11_gfx12<0x264>;
 defm V_MINMAX_I32          : VOP3_Realtriple_gfx11_gfx12<0x265>;
-defm V_DOT2_F16_F16        : VOP3Dot_Realtriple_gfx11_gfx12<0x266>;
-defm V_DOT2_BF16_BF16      : VOP3Dot_Realtriple_gfx11_gfx12<0x267>;
+defm V_DOT2_F16_F16        : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x266, "v_dot2_f16_f16">;
+defm V_DOT2_BF16_BF16      : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x267, "v_dot2_bf16_bf16">;
 defm V_DIV_SCALE_F32       : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">;
 defm V_DIV_SCALE_F64       : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
 defm V_MAD_U64_U32_gfx11   : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index aab5dc7465d938..58dc812bc3ef6a 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -347,9 +347,12 @@ class VOP3FP8OpSel_src_bytesel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3e_gf
    let Inst{14-13} = byte_sel;  // op_sel2/3
  }
 
-class VOP3DotOpSel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3OpSel_gfx11_gfx12<op, p>{
+class VOP3DotOpSel_gfx11_gfx12<bits<10> op, VOPProfile p> :
+    VOP3e_t16_gfx11_gfx12<op, p>{
   let Inst{11} = ?;
   let Inst{12} = ?;
+  let Inst{13} = !if(p.HasSrc2Mods, src2_modifiers{2}, 0);
+  let Inst{14} = !if(!and(p.HasDst, p.HasSrc0Mods),  src0_modifiers{3}, 0);
 }
 
 // NB: For V_INTERP* opcodes, src0 is encoded as src1 and vice versa
@@ -1611,10 +1614,12 @@ multiclass VOP3_Real_Base<GFXGen Gen, bits<10> op, string opName = NAME,
   }
 }
 
-multiclass VOP3Dot_Real_Base<GFXGen Gen, bits<10> op, string opName = NAME,
+multiclass VOP3Dot_Real_Base<GFXGen Gen, bits<10> op, string asmName, string opName = NAME,
                              bit isSingle = 0> {
   defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
-  let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
+  let AsmString = asmName # ps.AsmOperands,
+      DecoderNamespace = Gen.DecoderNamespace # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"),
+      IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
     def _e64#Gen.Suffix :
       VOP3_Real_Gen<ps, Gen>,
       VOP3DotOpSel_gfx11_gfx12<op, ps.Pfl>;
@@ -1678,9 +1683,13 @@ multiclass VOP3_Real_dpp_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
       VOP3_DPP16_Gen<op, ps, Gen>;
 }
 
-multiclass VOP3Dot_Real_dpp_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
+multiclass VOP3Dot_Real_dpp_Base<GFXGen Gen, bits<10> op, string asmName, string opName = NAME> {
+  defvar ps = !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp");
   def _e64_dpp#Gen.Suffix :
-    VOP3_DPP16_Gen<op, !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp"), Gen> {
+    VOP3_DPP16_Gen_t16<op, ps, Gen> {
+      let AsmString = asmName # ps.Pfl.AsmVOP3DPP16;
+      let DecoderNamespace = Gen.DecoderNamespace
+                             # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
       let Inst{11} = ?;
       let Inst{12} = ?;
     }
@@ -1702,12 +1711,14 @@ multiclass VOP3_Real_dpp8_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
   }
 }
 
-multiclass VOP3Dot_Real_dpp8_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
+multiclass VOP3Dot_Real_dpp8_Base<GFXGen Gen, bits<10> op, string asmName, string opName = NAME> {
   defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
-  def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8<op, ps> {
+  def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8_t16<op, ps> {
     let Inst{11} = ?;
     let Inst{12} = ?;
-    let DecoderNamespace = Gen.DecoderNamespace;
+    let AsmString = asmName # ps.Pfl.AsmVOP3DPP8;
+    let DecoderNamespace = Gen.DecoderNamespace
+                           # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
     let AssemblerPredicate = Gen.AssemblerPredicate;
   }
 }
@@ -1760,11 +1771,11 @@ multiclass VOP3_Realtriple<GFXGen Gen, bits<10> op, bit isSingle = 0,
   VOP3_Real_dpp_Base<Gen, op, opName>,
   VOP3_Real_dpp8_Base<Gen, op, opName>;
 
-multiclass VOP3Dot_Realtriple<GFXGen Gen, bits<10> op, bit isSingle = 0,
+multiclass VOP3Dot_Realtriple<GFXGen Gen, bits<10> op, string asmName, bit isSingle = 0,
                               string opName = NAME> :
-  VOP3Dot_Real_Base<Gen, op, opName, isSingle>,
-  VOP3Dot_Real_dpp_Base<Gen, op, opName>,
-  VOP3Dot_Real_dpp8_Base<Gen, op, opName>;
+  VOP3Dot_Real_Base<Gen, op, asmName, opName, isSingle>,
+  VOP3Dot_Real_dpp_Base<Gen, op, asmName, opName>,
+  VOP3Dot_Real_dpp8_Base<Gen, op, asmName, opName>;
 
 multiclass VOP3Only_Realtriple<GFXGen Gen, bits<10> op> :
   VOP3_Realtriple<Gen, op, 1>;
@@ -1862,6 +1873,12 @@ multiclass VOP3Only_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
   VOP3Only_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
   VOP3Only_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
 
+multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op, string asmName, bit isSingle = 0,
+                                          string opName = NAME> :
+  VOP3Dot_Realtriple<GFX11Gen, op, asmName, isSingle, opName>,
+  VOP3Dot_Realtriple<GFX12Gen, op, asmName, isSingle, opName>;
+
+
 //===----------------------------------------------------------------------===//
 
 include "VOPCInstructions.td"
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
index 210d55898367d8..1f6dc01b5d6e96 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -2119,104 +2119,116 @@ v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2
 // W64: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf]
 // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
 
-v_dot2_bf16_bf16 v5, v1, v2, s3
-// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00]
+v_dot2_bf16_bf16 v5.l, v1, v2, s3
+// GFX11: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00]
 
-v_dot2_bf16_bf16 v5, v255, v255, s105
-// GFX11: encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01]
+v_dot2_bf16_bf16 v5.l, v255, v255, s105
+// GFX11: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01]
 
-v_dot2_bf16_bf16 v5, s1, s2, v3
-// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04]
+v_dot2_bf16_bf16 v5.l, s1, s2, v3.l
+// GFX11: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04]
 
-v_dot2_bf16_bf16 v5, s105, s105, m0
-// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01]
+v_dot2_bf16_bf16 v5.l, s105, s105, m0
+// GFX11: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01]
 
-v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255
-// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07]
+v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h
+// GFX11: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07]
 
-v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi
-// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00]
+v_dot2_bf16_bf16 v5.l, vcc_hi, 0xfe0b, vcc_hi
+// GFX11: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00]
 
-v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15
-// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01]
+v_dot2_bf16_bf16 v5.l, ttmp15, src_scc, ttmp15
+// GFX11: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01]
 
-v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo
-// GFX11: encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81]
+v_dot2_bf16_bf16 v5.l, |m0|, -1, -vcc_lo
+// GFX11: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81]
 
-v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b|
-// GFX11: encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00]
+v_dot2_bf16_bf16 v5.l, -|exec_lo|, null, -|0xfe0b|
+// GFX11: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00]
 
-v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo|
-// GFX11: encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1]
+v_dot2_bf16_bf16 v5.l, -|exec_hi|, -|exec_lo|, -|exec_lo|
+// GFX11: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1]
 
-v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc|
-// GFX11: encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43]
+v_dot2_bf16_bf16 v5.l, null, -exec_hi, |src_scc|
+// GFX11: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43]
 
-v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| op_sel:[0,0,0,0]
-// GFX11: encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1]
+v_dot2_bf16_bf16 v5.l, -1, -|m0|, -|exec_hi|
+// GFX11: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1]
 
-v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
-// GFX11: encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23]
+v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1
+// GFX11: [0x05,0x02,0x67,0xd6,0xfd,0xd4,0x04,0x23]
 
-v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1]
-// GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null
+// GFX11: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 
-v_dot2_bf16_bf16 v2, v0, 0x20004000, v2
-// GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
+v_dot2_bf16_bf16 v2.l, v0, 0x20004000, v2.l
+// GFX11: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
 
-v_dot2_bf16_bf16 v2, 0x20004000, v0, v2
-// GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
+v_dot2_bf16_bf16 v2.l, 0x20004000, v0, v2.l
+// GFX11: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
 
-v_dot2_f16_f16 v5, v1, v2, s3
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00]
+v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h
+// GFX11: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07]
 
-v_dot2_f16_f16 v5, v255, s2, s105
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01]
+v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null
+// GFX11: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 
-v_dot2_f16_f16 v5, s1, v255, exec_hi
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01]
+v_dot2_f16_f16 v5.l, v1, v2, s3
+// GFX11: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00]
 
-v_dot2_f16_f16 v5, s105, s105, exec_lo
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01]
+v_dot2_f16_f16 v5.l, v255, s2, s105
+// GFX11: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01]
 
-v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04]
+v_dot2_f16_f16 v5.l, s1, v255, exec_hi
+// GFX11: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01]
 
-v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+v_dot2_f16_f16 v5.l, s105, s105, exec_lo
+// GFX11: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01]
 
-v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15|
-// GFX11: encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1]
+v_dot2_f16_f16 v5.l, vcc_lo, ttmp15, v3.l
+// GFX11: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04]
 
-v_dot2_f16_f16 v5, m0, 0.5, m0
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01]
+v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h
+// GFX11: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
 
-v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi
-// GFX11: encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01]
+v_dot2_f16_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15|
+// GFX11: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1]
 
-v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo|
-// GFX11: encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1]
+v_dot2_f16_f16 v5.l, m0, 0.5, m0
+// GFX11: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01]
 
-v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b|
-// GFX11: encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+v_dot2_f16_f16 v5.l, |exec_lo|, -1, vcc_hi
+// GFX11: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01]
 
-v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc|
-// GFX11: encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3]
+v_dot2_f16_f16 v5.l, -|exec_hi|, null, -|vcc_lo|
+// GFX11: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1]
 
-v_dot2_f16_f16 v5, 0.5, -m0, 0.5 op_sel:[0,0,0,0]
-// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43]
+v_dot2_f16_f16 v5.l, null, exec_lo, -|0xfe0b|
+// GFX11: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
 
-v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
-// GFX11: encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23]
+v_dot2_f16_f16 v5.l, -1, -|exec_hi|, -|src_scc|
+// GFX11: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3]
 
-v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1]
-// GFX11: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+v_dot2_f16_f16 v5.l, 0.5, -m0, 0.5
+// GFX11: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43]
 
-v_dot2_f16_f16 v2, v0, 0x20004000, v2
-// GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
+v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1
+// GFX11: [0x05,0x02,0x66,0xd6,0xfd,0xd4,0x04,0x23]
 
-v_dot2_f16_f16 v2, 0x20004000, v0, v2
-// GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
+v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null
+// GFX11: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+
+v_dot2_f16_f16 v2.l, v0, 0x20004000, v2.l
+// GFX11: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
+
+v_dot2_f16_f16 v2.l, 0x20004000, v0, v2.l
+// GFX11: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
+
+v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h
+// GFX11: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+
+v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null
+// GFX11: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 
 v_fma_dx9_zero_f32 v5, v1, v2, s3
 // GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
index c82b61e21edf64..ba594e7406fc52 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
@@ -4700,44 +4700,117 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 ban
 v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
 // GFX11: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
 
-v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
-// GFX11: encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
+v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand
+
+v_dot2_f16_f16_e64_dpp v0.l, s1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_dot2_f16_f16_e64_dpp v0.l, v1, s2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
-v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
 // GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand
 
-v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+v_dot2_bf16_bf16_e64_dpp v0.l, s1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
-v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
+v_dot2_bf16_bf16_e64_dpp v0.l, v1, s2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
-v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
-// GFX11: encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00]
+v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v3.l quad_perm:[3,2,1,0]
+// GFX11: [0x05,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
 
-v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1
-// GFX11: encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00]
+v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h quad_perm:[0,1,2,3]
+// GFX11: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xf...
[truncated]

@broxigarchen broxigarchen requested review from Pierre-vh and removed request for Sisyph October 28, 2024 21:42
@broxigarchen
Copy link
Contributor Author

ping!

@kosarev kosarev requested a review from Sisyph October 30, 2024 11:37
@broxigarchen broxigarchen force-pushed the main-merge-true16-vop3dot-mc branch from 49e0224 to 6db0618 Compare November 4, 2024 16:48
@broxigarchen broxigarchen requested a review from kosarev November 4, 2024 16:53
@broxigarchen broxigarchen force-pushed the main-merge-true16-vop3dot-mc branch 2 times, most recently from 26dd4b9 to 1572406 Compare November 8, 2024 03:13
@broxigarchen broxigarchen requested a review from kosarev November 8, 2024 03:14
@broxigarchen broxigarchen requested a review from Sisyph November 12, 2024 17:44
Copy link
Contributor

@Sisyph Sisyph left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@broxigarchen broxigarchen force-pushed the main-merge-true16-vop3dot-mc branch from 0612e15 to c842cf0 Compare November 15, 2024 02:16
@broxigarchen
Copy link
Contributor Author

Squash the commits for the ease of bringing up downstreaming patch

let HasSrc1Mods = 1;
let HasSrc2Mods = 1;
let AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
HasOpSel, HasOMod, IsVOP3P, HasModifiers, 1/*HasSrc0Mods*/, 1/*HasSrc1Mods*/,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: single space indentation.

@broxigarchen broxigarchen force-pushed the main-merge-true16-vop3dot-mc branch 2 times, most recently from 933c5c3 to 21f17e8 Compare December 5, 2024 17:00
@broxigarchen broxigarchen force-pushed the main-merge-true16-vop3dot-mc branch from 21f17e8 to 0b13cfa Compare December 5, 2024 17:25
@broxigarchen broxigarchen merged commit b9b46de into llvm:main Dec 9, 2024
8 checks passed
Copy link
Contributor

@mariusz-sikora-at-amd mariusz-sikora-at-amd left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a full list, but some tests are duplicates

# W64-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
# W64-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]

0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is duplicated in L5472

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing this out. I will bring up a patch to run "--unique" on these files

# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]

0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicated in L5478

# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]

0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicated in L5521

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

6 participants