Skip to content

Commit b9b46de

Browse files
authored
[AMDGPU][MC][True16] VOP3dot instruction update for true16/fake16 (#113474)
Update VOP3dot instructions with true16 and fake16 formats. This patch includes instructions: v_dot2_f16_f16 v_dot2_bf16_bf16
1 parent 392622d commit b9b46de

14 files changed

+1375
-383
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1345,6 +1345,30 @@ class VOP3_DOT_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> {
13451345
let HasOMod = 0;
13461346
}
13471347

1348+
class VOP3_DOT_Profile_t16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile_True16<P, Features> {
1349+
let HasClamp = 0;
1350+
let HasOMod = 0;
1351+
// Override modifiers for bf16(i16) (same as float modifiers).
1352+
let HasSrc0Mods = 1;
1353+
let HasSrc1Mods = 1;
1354+
let HasSrc2Mods = 1;
1355+
let Src0ModVOP3DPP = FPVRegInputMods;
1356+
let Src1ModVOP3DPP = FP32VCSrcInputMods;
1357+
let Src2ModVOP3DPP = FPT16VCSrcInputMods</*IsFake16*/0>;
1358+
}
1359+
1360+
class VOP3_DOT_Profile_fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile_Fake16<P, Features> {
1361+
let HasClamp = 0;
1362+
let HasOMod = 0;
1363+
// Override modifiers for bf16(i16) (same as float modifiers).
1364+
let HasSrc0Mods = 1;
1365+
let HasSrc1Mods = 1;
1366+
let HasSrc2Mods = 1;
1367+
let AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
1368+
HasOpSel, HasOMod, IsVOP3P, HasModifiers, 1/*HasSrc0Mods*/, 1/*HasSrc1Mods*/,
1369+
1/*HasSrc2Mods*/, DstVT>.ret;
1370+
}
1371+
13481372
let SubtargetPredicate = isGFX11Plus in {
13491373
defm V_MAXMIN_F32 : VOP3Inst<"v_maxmin_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
13501374
defm V_MINMAX_F32 : VOP3Inst<"v_minmax_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
@@ -1409,9 +1433,15 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
14091433
defm V_MINIMUMMAXIMUM_F16 : VOP3Inst<"v_minimummaximum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
14101434
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
14111435

1412-
let OtherPredicates = [HasDot9Insts], IsDOT=1 in {
1413-
defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>, int_amdgcn_fdot2_f16_f16>;
1414-
defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_BF16_V2BF16_V2BF16_BF16>, int_amdgcn_fdot2_bf16_bf16>;
1436+
let SubtargetPredicate = HasDot9Insts, IsDOT=1 in {
1437+
defm V_DOT2_F16_F16 : VOP3Inst_t16_with_profiles<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>,
1438+
VOP3_DOT_Profile_t16<VOP_F16_V2F16_V2F16_F16>,
1439+
VOP3_DOT_Profile_fake16<VOP_F16_V2F16_V2F16_F16>,
1440+
int_amdgcn_fdot2_f16_f16>;
1441+
defm V_DOT2_BF16_BF16 : VOP3Inst_t16_with_profiles<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_BF16_V2BF16_V2BF16_BF16>,
1442+
VOP3_DOT_Profile_t16<VOP_BF16_V2BF16_V2BF16_BF16>,
1443+
VOP3_DOT_Profile_fake16<VOP_BF16_V2BF16_V2BF16_BF16>,
1444+
int_amdgcn_fdot2_bf16_bf16>;
14151445
}
14161446

14171447
class VOP_Pseudo_Scalar<RegisterClass Dst, RegisterOperand SrcOp,
@@ -1609,8 +1639,10 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
16091639
VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
16101640
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
16111641

1612-
multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
1613-
VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
1642+
multiclass VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME> {
1643+
defm _t16: VOP3Dot_Realtriple_gfx11_gfx12<op, asmName, 0, opName#"_t16">;
1644+
defm _fake16: VOP3Dot_Realtriple_gfx11_gfx12<op, asmName, 0, opName#"_fake16">;
1645+
}
16141646

16151647
multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
16161648
string pseudo_mnemonic = "", bit isSingle = 0> :
@@ -1702,8 +1734,8 @@ defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11_gfx12<0x262>;
17021734
defm V_MINMAX_U32 : VOP3_Realtriple_gfx11_gfx12<0x263>;
17031735
defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11_gfx12<0x264>;
17041736
defm V_MINMAX_I32 : VOP3_Realtriple_gfx11_gfx12<0x265>;
1705-
defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_gfx11_gfx12<0x266>;
1706-
defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_gfx11_gfx12<0x267>;
1737+
defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x266, "v_dot2_f16_f16">;
1738+
defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x267, "v_dot2_bf16_bf16">;
17071739
defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">;
17081740
defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
17091741
defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -359,9 +359,12 @@ class VOP3FP8OpSel_src_bytesel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3e_gf
359359
let Inst{14-13} = byte_sel; // op_sel2/3
360360
}
361361

362-
class VOP3DotOpSel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3OpSel_gfx11_gfx12<op, p>{
362+
class VOP3DotOpSel_gfx11_gfx12<bits<10> op, VOPProfile p> :
363+
VOP3e_t16_gfx11_gfx12<op, p>{
363364
let Inst{11} = ?;
364365
let Inst{12} = ?;
366+
let Inst{13} = !if(p.HasSrc2Mods, src2_modifiers{2}, 0);
367+
let Inst{14} = !if(!and(p.HasDst, p.HasSrc0Mods), src0_modifiers{3}, 0);
365368
}
366369

367370
// NB: For V_INTERP* opcodes, src0 is encoded as src1 and vice versa
@@ -1706,10 +1709,12 @@ multiclass VOP3_Real_Base<GFXGen Gen, bits<10> op, string opName = NAME,
17061709
}
17071710
}
17081711

1709-
multiclass VOP3Dot_Real_Base<GFXGen Gen, bits<10> op, string opName = NAME,
1712+
multiclass VOP3Dot_Real_Base<GFXGen Gen, bits<10> op, string asmName, string opName = NAME,
17101713
bit isSingle = 0> {
17111714
defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
1712-
let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
1715+
let AsmString = asmName # ps.AsmOperands,
1716+
DecoderNamespace = Gen.DecoderNamespace # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"),
1717+
IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
17131718
def _e64#Gen.Suffix :
17141719
VOP3_Real_Gen<ps, Gen>,
17151720
VOP3DotOpSel_gfx11_gfx12<op, ps.Pfl>;
@@ -1773,9 +1778,13 @@ multiclass VOP3_Real_dpp_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
17731778
VOP3_DPP16_Gen<op, ps, Gen>;
17741779
}
17751780

1776-
multiclass VOP3Dot_Real_dpp_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
1781+
multiclass VOP3Dot_Real_dpp_Base<GFXGen Gen, bits<10> op, string asmName, string opName = NAME> {
1782+
defvar ps = !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp");
17771783
def _e64_dpp#Gen.Suffix :
1778-
VOP3_DPP16_Gen<op, !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp"), Gen> {
1784+
VOP3_DPP16_Gen_t16<op, ps, Gen> {
1785+
let AsmString = asmName # ps.Pfl.AsmVOP3DPP16;
1786+
let DecoderNamespace = Gen.DecoderNamespace
1787+
# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
17791788
let Inst{11} = ?;
17801789
let Inst{12} = ?;
17811790
}
@@ -1797,12 +1806,14 @@ multiclass VOP3_Real_dpp8_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
17971806
}
17981807
}
17991808

1800-
multiclass VOP3Dot_Real_dpp8_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
1809+
multiclass VOP3Dot_Real_dpp8_Base<GFXGen Gen, bits<10> op, string asmName, string opName = NAME> {
18011810
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
1802-
def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8<op, ps> {
1811+
def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8_t16<op, ps> {
18031812
let Inst{11} = ?;
18041813
let Inst{12} = ?;
1805-
let DecoderNamespace = Gen.DecoderNamespace;
1814+
let AsmString = asmName # ps.Pfl.AsmVOP3DPP8;
1815+
let DecoderNamespace = Gen.DecoderNamespace
1816+
# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
18061817
let AssemblerPredicate = Gen.AssemblerPredicate;
18071818
}
18081819
}
@@ -1855,11 +1866,11 @@ multiclass VOP3_Realtriple<GFXGen Gen, bits<10> op, bit isSingle = 0,
18551866
VOP3_Real_dpp_Base<Gen, op, opName>,
18561867
VOP3_Real_dpp8_Base<Gen, op, opName>;
18571868

1858-
multiclass VOP3Dot_Realtriple<GFXGen Gen, bits<10> op, bit isSingle = 0,
1869+
multiclass VOP3Dot_Realtriple<GFXGen Gen, bits<10> op, string asmName, bit isSingle = 0,
18591870
string opName = NAME> :
1860-
VOP3Dot_Real_Base<Gen, op, opName, isSingle>,
1861-
VOP3Dot_Real_dpp_Base<Gen, op, opName>,
1862-
VOP3Dot_Real_dpp8_Base<Gen, op, opName>;
1871+
VOP3Dot_Real_Base<Gen, op, asmName, opName, isSingle>,
1872+
VOP3Dot_Real_dpp_Base<Gen, op, asmName, opName>,
1873+
VOP3Dot_Real_dpp8_Base<Gen, op, asmName, opName>;
18631874

18641875
multiclass VOP3Only_Realtriple<GFXGen Gen, bits<10> op> :
18651876
VOP3_Realtriple<Gen, op, 1>;
@@ -1957,6 +1968,12 @@ multiclass VOP3Only_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
19571968
VOP3Only_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
19581969
VOP3Only_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
19591970

1971+
multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op, string asmName, bit isSingle = 0,
1972+
string opName = NAME> :
1973+
VOP3Dot_Realtriple<GFX11Gen, op, asmName, isSingle, opName>,
1974+
VOP3Dot_Realtriple<GFX12Gen, op, asmName, isSingle, opName>;
1975+
1976+
19601977
//===----------------------------------------------------------------------===//
19611978

19621979
include "VOPCInstructions.td"

0 commit comments

Comments
 (0)