diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 47b60bb0fdab3..6f3fb1e81827e 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -1345,6 +1345,30 @@ class VOP3_DOT_Profile : VOP3_Profile { let HasOMod = 0; } +class VOP3_DOT_Profile_t16 : VOP3_Profile_True16 { + let HasClamp = 0; + let HasOMod = 0; + // Override modifiers for bf16(i16) (same as float modifiers). + let HasSrc0Mods = 1; + let HasSrc1Mods = 1; + let HasSrc2Mods = 1; + let Src0ModVOP3DPP = FPVRegInputMods; + let Src1ModVOP3DPP = FP32VCSrcInputMods; + let Src2ModVOP3DPP = FPT16VCSrcInputMods; +} + +class VOP3_DOT_Profile_fake16 : VOP3_Profile_Fake16 { + let HasClamp = 0; + let HasOMod = 0; + // Override modifiers for bf16(i16) (same as float modifiers). + let HasSrc0Mods = 1; + let HasSrc1Mods = 1; + let HasSrc2Mods = 1; + let AsmVOP3Base = getAsmVOP3Base.ret; +} + let SubtargetPredicate = isGFX11Plus in { defm V_MAXMIN_F32 : VOP3Inst<"v_maxmin_f32", VOP3_Profile>; defm V_MINMAX_F32 : VOP3Inst<"v_minmax_f32", VOP3_Profile>; @@ -1409,9 +1433,15 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { defm V_MINIMUMMAXIMUM_F16 : VOP3Inst<"v_minimummaximum_f16", VOP3_Profile>; } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 -let OtherPredicates = [HasDot9Insts], IsDOT=1 in { - defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile, int_amdgcn_fdot2_f16_f16>; - defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile, int_amdgcn_fdot2_bf16_bf16>; +let SubtargetPredicate = HasDot9Insts, IsDOT=1 in { + defm V_DOT2_F16_F16 : VOP3Inst_t16_with_profiles<"v_dot2_f16_f16", VOP3_DOT_Profile, + VOP3_DOT_Profile_t16, + VOP3_DOT_Profile_fake16, + int_amdgcn_fdot2_f16_f16>; + defm V_DOT2_BF16_BF16 : VOP3Inst_t16_with_profiles<"v_dot2_bf16_bf16", VOP3_DOT_Profile, + VOP3_DOT_Profile_t16, + VOP3_DOT_Profile_fake16, + int_amdgcn_fdot2_bf16_bf16>; } class VOP_Pseudo_Scalar op, string opName, VOP3_Realtriple_with_name, VOP3_Realtriple_with_name; -multiclass VOP3Dot_Realtriple_gfx11_gfx12 op> : - VOP3Dot_Realtriple, VOP3Dot_Realtriple; +multiclass VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12 op, string asmName, string opName = NAME> { + defm _t16: VOP3Dot_Realtriple_gfx11_gfx12; + defm _fake16: VOP3Dot_Realtriple_gfx11_gfx12; +} multiclass VOP3_Realtriple_t16_gfx11_gfx12 op, string asmName, string opName = NAME, string pseudo_mnemonic = "", bit isSingle = 0> : @@ -1702,8 +1734,8 @@ defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11_gfx12<0x262>; defm V_MINMAX_U32 : VOP3_Realtriple_gfx11_gfx12<0x263>; defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11_gfx12<0x264>; defm V_MINMAX_I32 : VOP3_Realtriple_gfx11_gfx12<0x265>; -defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_gfx11_gfx12<0x266>; -defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_gfx11_gfx12<0x267>; +defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x266, "v_dot2_f16_f16">; +defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x267, "v_dot2_bf16_bf16">; defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">; defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">; defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 0e19696a32f86..5c4d96d3688b8 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -359,9 +359,12 @@ class VOP3FP8OpSel_src_bytesel_gfx11_gfx12 op, VOPProfile p> : VOP3e_gf let Inst{14-13} = byte_sel; // op_sel2/3 } -class VOP3DotOpSel_gfx11_gfx12 op, VOPProfile p> : VOP3OpSel_gfx11_gfx12{ +class VOP3DotOpSel_gfx11_gfx12 op, VOPProfile p> : + VOP3e_t16_gfx11_gfx12{ let Inst{11} = ?; let Inst{12} = ?; + let Inst{13} = !if(p.HasSrc2Mods, src2_modifiers{2}, 0); + let Inst{14} = !if(!and(p.HasDst, p.HasSrc0Mods), src0_modifiers{3}, 0); } // NB: For V_INTERP* opcodes, src0 is encoded as src1 and vice versa @@ -1706,10 +1709,12 @@ multiclass VOP3_Real_Base op, string opName = NAME, } } -multiclass VOP3Dot_Real_Base op, string opName = NAME, +multiclass VOP3Dot_Real_Base op, string asmName, string opName = NAME, bit isSingle = 0> { defvar ps = !cast(opName#"_e64"); - let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in { + let AsmString = asmName # ps.AsmOperands, + DecoderNamespace = Gen.DecoderNamespace # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"), + IsSingle = !or(isSingle, ps.Pfl.IsSingle) in { def _e64#Gen.Suffix : VOP3_Real_Gen, VOP3DotOpSel_gfx11_gfx12; @@ -1773,9 +1778,13 @@ multiclass VOP3_Real_dpp_Base op, string opName = NAME> { VOP3_DPP16_Gen; } -multiclass VOP3Dot_Real_dpp_Base op, string opName = NAME> { +multiclass VOP3Dot_Real_dpp_Base op, string asmName, string opName = NAME> { + defvar ps = !cast(opName#"_e64"#"_dpp"); def _e64_dpp#Gen.Suffix : - VOP3_DPP16_Gen(opName#"_e64"#"_dpp"), Gen> { + VOP3_DPP16_Gen_t16 { + let AsmString = asmName # ps.Pfl.AsmVOP3DPP16; + let DecoderNamespace = Gen.DecoderNamespace + # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); let Inst{11} = ?; let Inst{12} = ?; } @@ -1797,12 +1806,14 @@ multiclass VOP3_Real_dpp8_Base op, string opName = NAME> { } } -multiclass VOP3Dot_Real_dpp8_Base op, string opName = NAME> { +multiclass VOP3Dot_Real_dpp8_Base op, string asmName, string opName = NAME> { defvar ps = !cast(opName#"_e64"); - def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8 { + def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8_t16 { let Inst{11} = ?; let Inst{12} = ?; - let DecoderNamespace = Gen.DecoderNamespace; + let AsmString = asmName # ps.Pfl.AsmVOP3DPP8; + let DecoderNamespace = Gen.DecoderNamespace + # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); let AssemblerPredicate = Gen.AssemblerPredicate; } } @@ -1855,11 +1866,11 @@ multiclass VOP3_Realtriple op, bit isSingle = 0, VOP3_Real_dpp_Base, VOP3_Real_dpp8_Base; -multiclass VOP3Dot_Realtriple op, bit isSingle = 0, +multiclass VOP3Dot_Realtriple op, string asmName, bit isSingle = 0, string opName = NAME> : - VOP3Dot_Real_Base, - VOP3Dot_Real_dpp_Base, - VOP3Dot_Real_dpp8_Base; + VOP3Dot_Real_Base, + VOP3Dot_Real_dpp_Base, + VOP3Dot_Real_dpp8_Base; multiclass VOP3Only_Realtriple op> : VOP3_Realtriple; @@ -1957,6 +1968,12 @@ multiclass VOP3Only_Realtriple_with_name_gfx11_gfx12 op, string opName, VOP3Only_Realtriple_with_name, VOP3Only_Realtriple_with_name; +multiclass VOP3Dot_Realtriple_gfx11_gfx12 op, string asmName, bit isSingle = 0, + string opName = NAME> : + VOP3Dot_Realtriple, + VOP3Dot_Realtriple; + + //===----------------------------------------------------------------------===// include "VOPCInstructions.td" diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index 376e6bf968cbc..b649bab532f26 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -2018,104 +2018,116 @@ v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 // W64: v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 ; encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] // W32-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_dot2_bf16_bf16 v5, v1, v2, s3 -// GFX11: v_dot2_bf16_bf16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] +v_dot2_bf16_bf16 v5.l, v1, v2, s3 +// GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] -v_dot2_bf16_bf16 v5, v255, v255, s105 -// GFX11: v_dot2_bf16_bf16 v5, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] +v_dot2_bf16_bf16 v5.l, v255, v255, s105 +// GFX11: v_dot2_bf16_bf16 v5.l, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] -v_dot2_bf16_bf16 v5, s1, s2, v3 -// GFX11: v_dot2_bf16_bf16 v5, s1, s2, v3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] +v_dot2_bf16_bf16 v5.l, s1, s2, v3.l +// GFX11: v_dot2_bf16_bf16 v5.l, s1, s2, v3.l ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] -v_dot2_bf16_bf16 v5, s105, s105, m0 -// GFX11: v_dot2_bf16_bf16 v5, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] +v_dot2_bf16_bf16 v5.l, s105, s105, m0 +// GFX11: v_dot2_bf16_bf16 v5.l, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] -v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 -// GFX11: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 ; encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.l +// GFX11: v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.l ; encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] -v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi -// GFX11: v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] +v_dot2_bf16_bf16 v5.l, vcc_hi, 0xfe0b, vcc_hi +// GFX11: v_dot2_bf16_bf16 v5.l, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] -v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 -// GFX11: v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] +v_dot2_bf16_bf16 v5.l, ttmp15, src_scc, ttmp15 +// GFX11: v_dot2_bf16_bf16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] -v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo -// GFX11: v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] +v_dot2_bf16_bf16 v5.l, |m0|, -1, -vcc_lo +// GFX11: v_dot2_bf16_bf16 v5.l, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] -v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| -// GFX11: v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] +v_dot2_bf16_bf16 v5.l, -|exec_lo|, null, -|0xfe0b| +// GFX11: v_dot2_bf16_bf16 v5.l, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] -v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| -// GFX11: v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] +v_dot2_bf16_bf16 v5.l, -|exec_hi|, -|exec_lo|, -|exec_lo| +// GFX11: v_dot2_bf16_bf16 v5.l, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] -v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| -// GFX11: v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] +v_dot2_bf16_bf16 v5.l, null, -exec_hi, |src_scc| +// GFX11: v_dot2_bf16_bf16 v5.l, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] -v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| op_sel:[0,0,0,0] -// GFX11: v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] +v_dot2_bf16_bf16 v5.l, -1, -|m0|, -|exec_hi| +// GFX11: v_dot2_bf16_bf16 v5.l, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] -v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] +v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] -v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] -// GFX11: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null +// GFX11: v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 -// GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] +v_dot2_bf16_bf16 v2.l, v0, 0x20004000, v2.l +// GFX11: v_dot2_bf16_bf16 v2.l, v0, 0x20004000, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] -v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 -// GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] +v_dot2_bf16_bf16 v2.l, 0x20004000, v0, v2.l +// GFX11: v_dot2_bf16_bf16 v2.l, 0x20004000, v0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] -v_dot2_f16_f16 v5, v1, v2, s3 -// GFX11: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] +v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h +// GFX11: v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] -v_dot2_f16_f16 v5, v255, s2, s105 -// GFX11: v_dot2_f16_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] +v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1 +// GFX11: v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x67,0xd6,0xfd,0xd4,0x04,0x23] -v_dot2_f16_f16 v5, s1, v255, exec_hi -// GFX11: v_dot2_f16_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] +v_dot2_f16_f16 v5.l, v1, v2, s3 +// GFX11: v_dot2_f16_f16 v5.l, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] -v_dot2_f16_f16 v5, s105, s105, exec_lo -// GFX11: v_dot2_f16_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] +v_dot2_f16_f16 v5.l, v255, s2, s105 +// GFX11: v_dot2_f16_f16 v5.l, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] -v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] +v_dot2_f16_f16 v5.l, s1, v255, exec_hi +// GFX11: v_dot2_f16_f16 v5.l, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] -v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_dot2_f16_f16 v5.l, s105, s105, exec_lo +// GFX11: v_dot2_f16_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] -v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] +v_dot2_f16_f16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_dot2_f16_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] -v_dot2_f16_f16 v5, m0, 0.5, m0 -// GFX11: v_dot2_f16_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] +v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] +v_dot2_f16_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: v_dot2_f16_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] -v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] +v_dot2_f16_f16 v5.l, m0, 0.5, m0 +// GFX11: v_dot2_f16_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] -v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| -// GFX11: v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_dot2_f16_f16 v5.l, |exec_lo|, -1, vcc_hi +// GFX11: v_dot2_f16_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] -v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] +v_dot2_f16_f16 v5.l, -|exec_hi|, null, -|vcc_lo| +// GFX11: v_dot2_f16_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] -v_dot2_f16_f16 v5, 0.5, -m0, 0.5 op_sel:[0,0,0,0] -// GFX11: v_dot2_f16_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] +v_dot2_f16_f16 v5.l, null, exec_lo, -|0xfe0b| +// GFX11: v_dot2_f16_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] +v_dot2_f16_f16 v5.l, -1, -|exec_hi|, -|src_scc| +// GFX11: v_dot2_f16_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] -v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] -// GFX11: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_dot2_f16_f16 v5.l, 0.5, -m0, 0.5 +// GFX11: v_dot2_f16_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] -v_dot2_f16_f16 v2, v0, 0x20004000, v2 -// GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] +v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] -v_dot2_f16_f16 v2, 0x20004000, v0, v2 -// GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] +v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null +// GFX11: v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v2.l, v0, 0x20004000, v2.l +// GFX11: v_dot2_f16_f16 v2.l, v0, 0x20004000, v2.l ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] + +v_dot2_f16_f16 v2.l, 0x20004000, v0, v2.l +// GFX11: v_dot2_f16_f16 v2.l, 0x20004000, v0, v2.l ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] + +v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1 +// GFX11: v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x66,0xd6,0xfd,0xd4,0x04,0x23] v_fma_dx9_zero_f32 v5, v1, v2, s3 // GFX11: v_fma_dx9_zero_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index 3f812625c9773..e6f868d2b40e7 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -4617,44 +4617,128 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 ban v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX11-ERR: :[[@LINE-1]]:39: error: invalid op_sel operand +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:43: error: invalid op_sel operand -v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX11-ERR: :[[@LINE-1]]:28: error: invalid operand for instruction +v_dot2_f16_f16_e64_dpp v0.l, s1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0.l, v1, s2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v3.l quad_perm:[3,2,1,0] +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h quad_perm:[0,1,2,3] +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s3 row_mirror +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s105 row_half_mirror +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, ttmp15 row_shl:1 +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_hi row_shl:15 +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_lo row_shr:1 +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, -|m0| row_shr:15 +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x66,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| row_ror:1 +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x66,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| row_ror:15 +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x66,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] -v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +v_dot2_f16_f16_e64_dpp v5.l, |v1|, -v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, |v1|, -v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x66,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v5.l, -v1, |v2|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, -v1, |v2|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x66,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +v_dot2_f16_f16_e64_dpp v5.l, -|v1|, -|v2|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, -|v1|, -|v2|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x03,0x66,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13] + +v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:45: error: invalid op_sel operand + +v_dot2_bf16_bf16_e64_dpp v0.l, s1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX11-ERR: :[[@LINE-1]]:32: error: invalid operand for instruction -v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +v_dot2_bf16_bf16_e64_dpp v0.l, v1, s2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX11-ERR: :[[@LINE-1]]:36: error: invalid operand for instruction -v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX11: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// GFX11: v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] +v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] -v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x02,0x02,0x01,0x1b,0x00,0xff] -v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX11-ERR: :[[@LINE-1]]:41: error: invalid op_sel operand +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v3.l quad_perm:[3,2,1,0] +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX11-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v3.l quad_perm:[0,1,2,3] +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX11-ERR: :[[@LINE-1]]:34: error: invalid operand for instruction +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h row_mirror +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s3 row_half_mirror +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s105 row_shl:1 +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, ttmp15 row_shl:15 +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, vcc_hi row_shr:1 +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v5.l, |v1|, v2, -vcc_lo row_shr:15 +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, |v1|, v2, -vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x67,0xd6,0xfa,0x04,0xaa,0x81,0x01,0x1f,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v5.l, v1, -v2, |m0| row_ror:1 +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, -v2, |m0| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x67,0xd6,0xfa,0x04,0xf6,0x41,0x01,0x21,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| row_ror:15 +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x67,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x2f,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x67,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x50,0x01,0xff] -v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +v_dot2_bf16_bf16_e64_dpp v5.l, -v1, |v2|, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, -v1, |v2|, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x67,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x5f,0x01,0x01] -v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX11: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, -|v2|, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, -|v2|, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x03,0x67,0xd6,0xfa,0x04,0x06,0x63,0x01,0x60,0x09,0x13] -v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// GFX11: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x02,0x02,0x01,0x1b,0x00,0xff] +v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index ff7b114b128cf..160863b19012d 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -2974,44 +2974,125 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] -// GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX11: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] -v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] -// GFX11-ERR: :[[@LINE-1]]:39: error: invalid op_sel operand +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:43: error: invalid op_sel operand -v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] -// GFX11-ERR: :[[@LINE-1]]:28: error: invalid operand for instruction +v_dot2_f16_f16_e64_dpp v0.l, s1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0.l, v1, s2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX11: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX11: v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, -|m0| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x66,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] + +v_dot2_f16_f16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x66,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] + +v_dot2_f16_f16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x66,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] -v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] +v_dot2_f16_f16_e64_dpp v5.l, |v1|, -v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, |v1|, -v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x66,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] + +v_dot2_f16_f16_e64_dpp v5.l, -v1, |v2|, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, -v1, |v2|, -1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x66,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +v_dot2_f16_f16_e64_dpp v5.l, -|v1|, -|v2|, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_dot2_f16_f16_e64_dpp v5.l, -|v1|, -|v2|, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x03,0x66,0xd6,0xea,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] + +v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x47,0x66,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX11: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:45: error: invalid op_sel operand + +v_dot2_bf16_bf16_e64_dpp v0.l, s1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] // GFX11-ERR: :[[@LINE-1]]:32: error: invalid operand for instruction -v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] -// GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +v_dot2_bf16_bf16_e64_dpp v0.l, v1, s2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:36: error: invalid operand for instruction -v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] -// GFX11: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] +v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX11: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] -v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] +v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX11: v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] -v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] -// GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, 0 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, 0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x02,0x02,0x01,0x77,0x39,0x05] -v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] -// GFX11-ERR: :[[@LINE-1]]:41: error: invalid op_sel operand +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] -// GFX11-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] -// GFX11-ERR: :[[@LINE-1]]:34: error: invalid operand for instruction +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] + +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_dot2_bf16_bf16_e64_dpp v5.l, |v1|, v2, -vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, |v1|, v2, -vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x67,0xd6,0xe9,0x04,0xaa,0x81,0x01,0x77,0x39,0x05] + +v_dot2_bf16_bf16_e64_dpp v5.l, v1, -v2, |m0| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, -v2, |m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x67,0xd6,0xe9,0x04,0xf6,0x41,0x01,0x77,0x39,0x05] + +v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x67,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] + +v_dot2_bf16_bf16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x67,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] -v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] -// GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +v_dot2_bf16_bf16_e64_dpp v5.l, -v1, |v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, -v1, |v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x67,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x77,0x39,0x05] -v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] -// GFX11: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] +v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, -|v2|, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, -|v2|, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x03,0x67,0xd6,0xea,0x04,0x06,0x63,0x01,0x77,0x39,0x05] -v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x02,0x02,0x01,0x77,0x39,0x05] +v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x47,0x67,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s index 69ed1af22b459..c7cd88e81583f 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s @@ -2033,92 +2033,116 @@ v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 // W64: v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 ; encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] // W32-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_dot2_bf16_bf16 v5, v1, v2, s3 -// GFX12: v_dot2_bf16_bf16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] +v_dot2_bf16_bf16 v5.l, v1, v2, s3 +// GFX12: v_dot2_bf16_bf16 v5.l, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] -v_dot2_bf16_bf16 v5, v255, v255, s105 -// GFX12: v_dot2_bf16_bf16 v5, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] +v_dot2_bf16_bf16 v5.l, v255, v255, s105 +// GFX12: v_dot2_bf16_bf16 v5.l, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] -v_dot2_bf16_bf16 v5, s1, s2, v3 -// GFX12: v_dot2_bf16_bf16 v5, s1, s2, v3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] +v_dot2_bf16_bf16 v5.l, s1, s2, v3.l +// GFX12: v_dot2_bf16_bf16 v5.l, s1, s2, v3.l ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] -v_dot2_bf16_bf16 v5, s105, s105, m0 -// GFX12: v_dot2_bf16_bf16 v5, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] +v_dot2_bf16_bf16 v5.l, s105, s105, m0 +// GFX12: v_dot2_bf16_bf16 v5.l, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] -v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 -// GFX12: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 ; encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.l +// GFX12: v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.l ; encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] -v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi -// GFX12: v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] +v_dot2_bf16_bf16 v5.l, vcc_hi, 0xfe0b, vcc_hi +// GFX12: v_dot2_bf16_bf16 v5.l, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] -v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 -// GFX12: v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] +v_dot2_bf16_bf16 v5.l, ttmp15, src_scc, ttmp15 +// GFX12: v_dot2_bf16_bf16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] -v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo -// GFX12: v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] +v_dot2_bf16_bf16 v5.l, |m0|, -1, -vcc_lo +// GFX12: v_dot2_bf16_bf16 v5.l, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] -v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| -// GFX12: v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] +v_dot2_bf16_bf16 v5.l, -|exec_lo|, null, -|0xfe0b| +// GFX12: v_dot2_bf16_bf16 v5.l, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] -v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| -// GFX12: v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] +v_dot2_bf16_bf16 v5.l, -|exec_hi|, -|exec_lo|, -|exec_lo| +// GFX12: v_dot2_bf16_bf16 v5.l, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] -v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| -// GFX12: v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] +v_dot2_bf16_bf16 v5.l, null, -exec_hi, |src_scc| +// GFX12: v_dot2_bf16_bf16 v5.l, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] -v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| op_sel:[0,0,0,0] -// GFX12: v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] +v_dot2_bf16_bf16 v5.l, -1, -|m0|, -|exec_hi| +// GFX12: v_dot2_bf16_bf16 v5.l, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] -v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX12: v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] +v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] -v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] -// GFX12: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null +// GFX12: v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_dot2_f16_f16 v5, v1, v2, s3 -// GFX12: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] +v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h +// GFX12: v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] -v_dot2_f16_f16 v5, v255, s2, s105 -// GFX12: v_dot2_f16_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] +v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1 +// GFX12: v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x67,0xd6,0xfd,0xd4,0x04,0x23] -v_dot2_f16_f16 v5, s1, v255, exec_hi -// GFX12: v_dot2_f16_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] +v_dot2_bf16_bf16 v2.l, v0, 0x20004000, v2.l +// GFX12: v_dot2_bf16_bf16 v2.l, v0, 0x20004000, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] -v_dot2_f16_f16 v5, s105, s105, exec_lo -// GFX12: v_dot2_f16_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] +v_dot2_bf16_bf16 v2.l, 0x20004000, v0, v2.l +// GFX12: v_dot2_bf16_bf16 v2.l, 0x20004000, v0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] -v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 -// GFX12: v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] +v_dot2_f16_f16 v5.l, v1, v2, s3 +// GFX12: v_dot2_f16_f16 v5.l, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] -v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX12: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_dot2_f16_f16 v5.l, v255, s2, s105 +// GFX12: v_dot2_f16_f16 v5.l, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] -v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX12: v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] +v_dot2_f16_f16 v5.l, s1, v255, exec_hi +// GFX12: v_dot2_f16_f16 v5.l, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] -v_dot2_f16_f16 v5, m0, 0.5, m0 -// GFX12: v_dot2_f16_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] +v_dot2_f16_f16 v5.l, s105, s105, exec_lo +// GFX12: v_dot2_f16_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] -v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi -// GFX12: v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] +v_dot2_f16_f16 v5.l, vcc_lo, ttmp15, v3.l +// GFX12: v_dot2_f16_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] -v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| -// GFX12: v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] +v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX12: v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| -// GFX12: v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_dot2_f16_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: v_dot2_f16_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] -v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| -// GFX12: v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] +v_dot2_f16_f16 v5.l, m0, 0.5, m0 +// GFX12: v_dot2_f16_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] -v_dot2_f16_f16 v5, 0.5, -m0, 0.5 op_sel:[0,0,0,0] -// GFX12: v_dot2_f16_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] +v_dot2_f16_f16 v5.l, |exec_lo|, -1, vcc_hi +// GFX12: v_dot2_f16_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] -v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX12: v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] +v_dot2_f16_f16 v5.l, -|exec_hi|, null, -|vcc_lo| +// GFX12: v_dot2_f16_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] -v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] -// GFX12: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_dot2_f16_f16 v5.l, null, exec_lo, -|0xfe0b| +// GFX12: v_dot2_f16_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v5.l, -1, -|exec_hi|, -|src_scc| +// GFX12: v_dot2_f16_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_dot2_f16_f16 v5.l, 0.5, -m0, 0.5 +// GFX12: v_dot2_f16_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] + +v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] + +v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null +// GFX12: v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX12: v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1 +// GFX12: v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x66,0xd6,0xfd,0xd4,0x04,0x23] + +v_dot2_f16_f16 v2.l, v0, 0x20004000, v2.l +// GFX12: v_dot2_f16_f16 v2.l, v0, 0x20004000, v2.l ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] + +v_dot2_f16_f16 v2.l, 0x20004000, v0, v2.l +// GFX12: v_dot2_f16_f16 v2.l, 0x20004000, v0, v2.l ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] v_fma_dx9_zero_f32 v5, v1, v2, s3 // GFX12: v_fma_dx9_zero_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index 6b7b2ac4d3cc5..894acc5e94e1d 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -5239,47 +5239,59 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 ban v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12-ERR: :[[@LINE-1]]:39: error: invalid op_sel operand +v_dot2_f16_f16_e64_dpp v0.l, s1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction -v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12-ERR: :[[@LINE-1]]:28: error: invalid operand for instruction +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12-ERR: :[[@LINE-1]]:43: error: invalid op_sel operand -v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12: v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0c,0x04,0x01,0xe4,0x04,0x00] +v_dot2_f16_f16_e64_dpp v0.l, v1, s2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, s2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0c,0x04,0x01,0xe4,0x04,0x00] -v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] -v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// GFX12: v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12-ERR: :[[@LINE-1]]:41: error: invalid op_sel operand +v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] -v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX12-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_bf16_bf16_e64_dpp v0.l, s1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX12-ERR: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12-ERR: :[[@LINE-1]]:45: error: invalid op_sel operand + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, s2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, s2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0c,0x04,0x01,0xe4,0x00,0x00] + +v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0c,0x04,0x01,0xe4,0x00,0x00] +v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] -v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x02,0x02,0x01,0x1b,0x00,0xff] -v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h row_mirror +// GFX12: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// GFX12: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x02,0x02,0x01,0x1b,0x00,0xff] +v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] // GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index 4ea57c4b74ae9..a5bfec80d8039 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -3508,47 +3508,59 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] -// GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] -v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] -// GFX12-ERR: :[[@LINE-1]]:39: error: invalid op_sel operand +v_dot2_f16_f16_e64_dpp v0.l, s1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX12-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction -v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] -// GFX12-ERR: :[[@LINE-1]]:28: error: invalid operand for instruction +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] +// GFX12-ERR: :[[@LINE-1]]:43: error: invalid op_sel operand -v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] -// GFX12: v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x88,0x46,0x92] +v_dot2_f16_f16_e64_dpp v0.l, v1, s2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, s2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x88,0x46,0x92] -v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] -// GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX12: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] -v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] -// GFX12: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] +v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX12: v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] -v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] -v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] -// GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] -// GFX12-ERR: :[[@LINE-1]]:41: error: invalid op_sel operand +v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x47,0x66,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] -// GFX12-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v0.l, s1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX12-ERR: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] +// GFX12-ERR: :[[@LINE-1]]:45: error: invalid op_sel operand + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, s2, v3.l dpp8:[0,1,2,3,4,4,4,4] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, s2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] -v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] -// GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x88,0x46,0x92] +v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] -v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] -// GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, 0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, 0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x02,0x02,0x01,0x77,0x39,0x05] -v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] -// GFX12: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] +v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x02,0x02,0x01,0x77,0x39,0x05] +v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x47,0x67,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] v_minimum_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_minimum_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x65,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index c580c19e1bcd8..60d213f1ff937 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -1985,103 +1985,226 @@ # W64: v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 ; encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] 0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_dot2_bf16_bf16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_dot2_bf16_bf16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_dot2_bf16_bf16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01 -# GFX11: v_dot2_bf16_bf16 v5, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] +# W32-FAKE16: v_dot2_bf16_bf16 v5, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] +# W64-FAKE16: v_dot2_bf16_bf16 v5, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] 0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04 -# GFX11: v_dot2_bf16_bf16 v5, s1, s2, v3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, s1, s2, v3.l ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] +# W32-FAKE16: v_dot2_bf16_bf16 v5, s1, s2, v3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, s1, s2, v3.l ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] +# W64-FAKE16: v_dot2_bf16_bf16 v5, s1, s2, v3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] 0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01 -# GFX11: v_dot2_bf16_bf16 v5, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] +# W32-FAKE16: v_dot2_bf16_bf16 v5, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] +# W64-FAKE16: v_dot2_bf16_bf16 v5, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] -0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07 -# GFX11: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 ; encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07 +# W32-REAL16: v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W32-FAKE16: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W64-FAKE16: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] 0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81 -# GFX11: v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] +# W32-FAKE16: v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] +# W64-FAKE16: v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] 0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00 -# GFX11: v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] 0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1 -# GFX11: v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] +# W32-FAKE16: v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] +# W64-FAKE16: v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] 0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43 -# GFX11: v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] +# W32-FAKE16: v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] +# W64-FAKE16: v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] 0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1 -# GFX11: v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] +# W32-FAKE16: v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] +# W64-FAKE16: v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] -0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x02,0x67,0xd6,0xfd,0xd4,0x04,0x23 +# W32-REAL16: v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x67,0xd6,0xfd,0xd4,0x04,0x23] +# W32-FAKE16: v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x67,0xd6,0xfd,0xd4,0x04,0x23] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x67,0xd6,0xfd,0xd4,0x04,0x23] +# W64-FAKE16: v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x67,0xd6,0xfd,0xd4,0x04,0x23] 0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20 -# GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] +# W32-REAL16: v_dot2_bf16_bf16 v2.l, v0, 0x20004000, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] +# W32-FAKE16: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] +# W64-REAL16: v_dot2_bf16_bf16 v2.l, v0, 0x20004000, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] +# W64-FAKE16: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] 0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20 -# GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] +# W32-REAL16: v_dot2_bf16_bf16 v2.l, 0x20004000, v0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] +# W32-FAKE16: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] +# W64-REAL16: v_dot2_bf16_bf16 v2.l, 0x20004000, v0, v2.l ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] +# W64-FAKE16: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] + +0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07 +# W32-REAL16: v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W32-FAKE16: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W64-FAKE16: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] + +0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_dot2_f16_f16 v5.l, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_dot2_f16_f16 v5.l, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_dot2_f16_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_dot2_f16_f16 v5.l, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_dot2_f16_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_dot2_f16_f16 v5.l, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_dot2_f16_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_dot2_f16_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_dot2_f16_f16 v5.l, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_dot2_f16_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_dot2_f16_f16 v5.l, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_dot2_f16_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_dot2_f16_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_dot2_f16_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_dot2_f16_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_dot2_f16_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_dot2_f16_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_dot2_f16_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_dot2_f16_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-REAL16: v_dot2_f16_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-FAKE16: v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-REAL16: v_dot2_f16_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-FAKE16: v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] 0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_dot2_f16_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-REAL16: v_dot2_f16_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-FAKE16: v_dot2_f16_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-REAL16: v_dot2_f16_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-FAKE16: v_dot2_f16_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] 0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_dot2_f16_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_dot2_f16_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-REAL16: v_dot2_f16_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-FAKE16: v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-REAL16: v_dot2_f16_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-FAKE16: v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] 0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_dot2_f16_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_f16_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] 0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-REAL16: v_dot2_f16_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-FAKE16: v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-REAL16: v_dot2_f16_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-FAKE16: v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] 0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX11: v_dot2_f16_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-REAL16: v_dot2_f16_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-FAKE16: v_dot2_f16_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-REAL16: v_dot2_f16_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-FAKE16: v_dot2_f16_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] -0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x02,0x66,0xd6,0xfd,0xd4,0x04,0x23 +# W32-REAL16: v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x66,0xd6,0xfd,0xd4,0x04,0x23] +# W32-FAKE16: v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x66,0xd6,0xfd,0xd4,0x04,0x23] +# W64-REAL16: v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x66,0xd6,0xfd,0xd4,0x04,0x23] +# W64-FAKE16: v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x66,0xd6,0xfd,0xd4,0x04,0x23] 0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20 -# GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] +# W32-REAL16: v_dot2_f16_f16 v2.l, v0, 0x20004000, v2.l ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] +# W32-FAKE16: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] +# W64-REAL16: v_dot2_f16_f16 v2.l, v0, 0x20004000, v2.l ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] +# W64-FAKE16: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] 0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20 -# GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] +# W32-REAL16: v_dot2_f16_f16 v2.l, 0x20004000, v0, v2.l ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] +# W32-FAKE16: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] +# W64-REAL16: v_dot2_f16_f16 v2.l, 0x20004000, v0, v2.l ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] +# W64-FAKE16: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] + +0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_fma_dx9_zero_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt index 7942d79db4fa6..35ad673de75db 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt @@ -5012,28 +5012,195 @@ # W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] # W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +0x05,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x66,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] + +0x05,0x00,0x66,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x66,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x66,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x04,0x66,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x66,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x66,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x66,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x66,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x05,0x66,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x66,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x66,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x66,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x66,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] + +0x05,0x06,0x66,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x66,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x66,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x66,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x66,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] + +0x05,0x01,0x66,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, |v1|, -v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x66,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, |v1|, -v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x66,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, |v1|, -v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x66,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, |v1|, -v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x66,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] + +0x05,0x02,0x66,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, -v1, |v2|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x66,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, -v1, |v2|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x66,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, -v1, |v2|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x66,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, -v1, |v2|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x66,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x03,0x66,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, -|v1|, -|v2|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x03,0x66,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x03,0x66,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, -|v1|, -|v2|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x03,0x66,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x03,0x66,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] + +0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x67,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x67,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x67,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x67,0xd6,0xfa,0x04,0xaa,0x81,0x01,0x1f,0x01,0xff +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, |v1|, v2, -vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x67,0xd6,0xfa,0x04,0xaa,0x81,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, |v1|, v2, -vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x67,0xd6,0xfa,0x04,0xaa,0x81,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, |v1|, v2, -vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x67,0xd6,0xfa,0x04,0xaa,0x81,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, |v1|, v2, -vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x67,0xd6,0xfa,0x04,0xaa,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x04,0x67,0xd6,0xfa,0x04,0xf6,0x41,0x01,0x21,0x01,0xff +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, -v2, |m0| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x67,0xd6,0xfa,0x04,0xf6,0x41,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, -v2, |m0| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x67,0xd6,0xfa,0x04,0xf6,0x41,0x01,0x21,0x01,0xff] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, -v2, |m0| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x67,0xd6,0xfa,0x04,0xf6,0x41,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, -v2, |m0| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x67,0xd6,0xfa,0x04,0xf6,0x41,0x01,0x21,0x01,0xff] + +0x05,0x05,0x67,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x2f,0x01,0xff +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x67,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, -|v1|, v2, -|exec_hi| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x67,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x67,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, -|v1|, v2, -|exec_hi| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x67,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x2f,0x01,0xff] + +0x05,0x06,0x67,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x50,0x01,0xff +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x67,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, -|v2|, -|exec_lo| row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x67,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x50,0x01,0xff] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x67,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, -|v2|, -|exec_lo| row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x67,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x50,0x01,0xff] + +0x05,0x02,0x67,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, -v1, |v2|, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x67,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, -v1, |v2|, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x67,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, -v1, |v2|, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x67,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, -v1, |v2|, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x67,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x5f,0x01,0x01] + +0x05,0x03,0x67,0xd6,0xfa,0x04,0x06,0x63,0x01,0x60,0x01,0x13 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, -|v2|, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x03,0x67,0xd6,0xfa,0x04,0x06,0x63,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, -|v1|, -|v2|, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x03,0x67,0xd6,0xfa,0x04,0x06,0x63,0x01,0x60,0x01,0x13] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, -|v2|, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x03,0x67,0xd6,0xfa,0x04,0x06,0x63,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, -|v1|, -|v2|, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x03,0x67,0xd6,0xfa,0x04,0x06,0x63,0x01,0x60,0x01,0x13] + +0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# op_sel[1:0] are ignored -0x00,0x78,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] - -0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] - -0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00 -# GFX11: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] - -0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] - -# op_sel[1:0] are ignored -0x00,0x78,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] - -0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] - -0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00 -# GFX11: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt index 1991817c9aa90..3a37a19d6d3af 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt @@ -3026,28 +3026,189 @@ # W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] # W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92 -# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +0x05,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] + +0x05,0x00,0x66,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x66,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x66,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x66,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +0x05,0x04,0x66,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x66,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x66,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x66,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x66,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] + +0x05,0x05,0x66,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x66,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x66,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x66,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x66,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] + +0x05,0x06,0x66,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x66,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x66,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x66,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x66,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] + +0x05,0x01,0x66,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, |v1|, -v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x66,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, |v1|, -v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x66,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, |v1|, -v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x66,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, |v1|, -v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x66,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] + +0x05,0x02,0x66,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, -v1, |v2|, -1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x66,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, -v1, |v2|, -1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x66,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, -v1, |v2|, -1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x66,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, -v1, |v2|, -1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x66,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +0x05,0x03,0x66,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, -|v1|, -|v2|, 0.5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x66,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x66,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, -|v1|, -|v2|, 0.5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x66,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x66,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] + +0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] + +0x05,0x00,0x67,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x67,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x67,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +0x05,0x01,0x67,0xd6,0xe9,0x04,0xaa,0x81,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, |v1|, v2, -vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x67,0xd6,0xe9,0x04,0xaa,0x81,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, |v1|, v2, -vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x67,0xd6,0xe9,0x04,0xaa,0x81,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, |v1|, v2, -vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x67,0xd6,0xe9,0x04,0xaa,0x81,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, |v1|, v2, -vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x67,0xd6,0xe9,0x04,0xaa,0x81,0x01,0x77,0x39,0x05] + +0x05,0x04,0x67,0xd6,0xe9,0x04,0xf6,0x41,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, -v2, |m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x67,0xd6,0xe9,0x04,0xf6,0x41,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, -v2, |m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x67,0xd6,0xe9,0x04,0xf6,0x41,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, -v2, |m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x67,0xd6,0xe9,0x04,0xf6,0x41,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, -v2, |m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x67,0xd6,0xe9,0x04,0xf6,0x41,0x01,0x77,0x39,0x05] + +0x05,0x05,0x67,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x67,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x67,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x67,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x67,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] + +0x05,0x06,0x67,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x67,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x67,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x67,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x67,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] + +0x05,0x02,0x67,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, -v1, |v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x67,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, -v1, |v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x67,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, -v1, |v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x67,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, -v1, |v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x67,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x77,0x39,0x05] + +0x05,0x03,0x67,0xd6,0xe9,0x04,0x06,0x63,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, -|v2|, -1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x67,0xd6,0xe9,0x04,0x06,0x63,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, -|v1|, -|v2|, -1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x67,0xd6,0xe9,0x04,0x06,0x63,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, -|v1|, -|v2|, -1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x67,0xd6,0xe9,0x04,0x06,0x63,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, -|v1|, -|v2|, -1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x67,0xd6,0xe9,0x04,0x06,0x63,0x01,0x77,0x39,0x05] + +0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -# op_sel[1:0] are ignored -0x00,0x78,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92 -# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] - -0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92 -# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] - -0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92 -# GFX11: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] - -0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92 -# GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] - -# op_sel[1:0] are ignored -0x00,0x78,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92 -# GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] - -0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92 -# GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] - -0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92 -# GFX11: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt index 232b4036c00dc..c3bab320b0ba2 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt @@ -1999,91 +1999,214 @@ # W64: v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 ; encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] 0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00 -# GFX12: v_dot2_bf16_bf16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_dot2_bf16_bf16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_dot2_bf16_bf16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01 -# GFX12: v_dot2_bf16_bf16 v5, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] +# W32-FAKE16: v_dot2_bf16_bf16 v5, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] +# W64-FAKE16: v_dot2_bf16_bf16 v5, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] 0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04 -# GFX12: v_dot2_bf16_bf16 v5, s1, s2, v3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, s1, s2, v3.l ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] +# W32-FAKE16: v_dot2_bf16_bf16 v5, s1, s2, v3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, s1, s2, v3.l ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] +# W64-FAKE16: v_dot2_bf16_bf16 v5, s1, s2, v3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] 0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01 -# GFX12: v_dot2_bf16_bf16 v5, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] +# W32-FAKE16: v_dot2_bf16_bf16 v5, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] +# W64-FAKE16: v_dot2_bf16_bf16 v5, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] 0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07 -# GFX12: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 ; encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.l ; encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W32-FAKE16: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 ; encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.l ; encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W64-FAKE16: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 ; encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] 0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00 -# GFX12: v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01 -# GFX12: v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81 -# GFX12: v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] +# W32-FAKE16: v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] +# W64-FAKE16: v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] 0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00 -# GFX12: v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] 0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1 -# GFX12: v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] +# W32-FAKE16: v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] +# W64-FAKE16: v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] 0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43 -# GFX12: v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] +# W32-FAKE16: v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] +# W64-FAKE16: v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] 0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1 -# GFX12: v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] +# W32-FAKE16: v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] +# W64-FAKE16: v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] 0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23 -# GFX12: v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] +# W32-REAL16: v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] +# W32-FAKE16: v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] +# W64-FAKE16: v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] 0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX12: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07 +# W32-REAL16: v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W32-FAKE16: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W64-FAKE16: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] + +0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07 +# W32-REAL16: v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W32-FAKE16: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W64-REAL16: v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +# W64-FAKE16: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07] + +0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00 -# GFX12: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_dot2_f16_f16 v5.l, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_dot2_f16_f16 v5.l, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01 -# GFX12: v_dot2_f16_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_dot2_f16_f16 v5.l, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_dot2_f16_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_dot2_f16_f16 v5.l, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_dot2_f16_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01 -# GFX12: v_dot2_f16_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_dot2_f16_f16 v5.l, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_dot2_f16_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_dot2_f16_f16 v5.l, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_dot2_f16_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01 -# GFX12: v_dot2_f16_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_dot2_f16_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_dot2_f16_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_dot2_f16_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_dot2_f16_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX12: v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_dot2_f16_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_dot2_f16_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX12: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX12: v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-REAL16: v_dot2_f16_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-FAKE16: v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-REAL16: v_dot2_f16_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-FAKE16: v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] 0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX12: v_dot2_f16_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-REAL16: v_dot2_f16_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-FAKE16: v_dot2_f16_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-REAL16: v_dot2_f16_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-FAKE16: v_dot2_f16_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] 0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01 -# GFX12: v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_dot2_f16_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_dot2_f16_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX12: v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-REAL16: v_dot2_f16_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-FAKE16: v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-REAL16: v_dot2_f16_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-FAKE16: v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] 0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX12: v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_dot2_f16_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_f16_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] 0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX12: v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-REAL16: v_dot2_f16_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-FAKE16: v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-REAL16: v_dot2_f16_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-FAKE16: v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] 0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX12: v_dot2_f16_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-REAL16: v_dot2_f16_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-FAKE16: v_dot2_f16_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-REAL16: v_dot2_f16_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-FAKE16: v_dot2_f16_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] 0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23 -# GFX12: v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] +# W32-REAL16: v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] +# W32-FAKE16: v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] +# W64-REAL16: v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] +# W64-FAKE16: v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] + +0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX12: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00 # GFX12: v_fma_dx9_zero_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt index efac4b7aa12f7..232ed8d23c9c6 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt @@ -5445,30 +5445,102 @@ # W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] # op_sel[1:0] are ignored 0x00,0x78,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] 0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] 0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00 -# GFX12: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] + +0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] # op_sel[1:0] are ignored 0x00,0x78,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] 0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] 0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00 -# GFX12: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] + +0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0xff,0x03,0x65,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 # GFX12: v_minimum_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x65,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt index 0be2725f4085d..469b199053d47 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt @@ -3396,30 +3396,102 @@ # W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92 -# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] # op_sel[1:0] are ignored 0x00,0x78,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92 -# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] 0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92 -# GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] 0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92 -# GFX12: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] +# W32-REAL16: v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] + +0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x66,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] 0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92 -# GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] # op_sel[1:0] are ignored 0x00,0x78,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92 -# GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] 0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92 -# GFX12: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] 0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92 -# GFX12: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] + +0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x67,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] 0xff,0x03,0x66,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 # GFX12: v_maximum_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x66,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]