Skip to content

Commit c842cf0

Browse files
committed
[AMDGPU][MC][True16] VOP3dot instruction update for true16/fake16
1 parent 02668f6 commit c842cf0

14 files changed

+1284
-313
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,30 @@ class VOP3_DOT_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> {
931931
let HasOMod = 0;
932932
}
933933

934+
class VOP3_DOT_Profile_t16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile_True16<P, Features> {
935+
let HasClamp = 0;
936+
let HasOMod = 0;
937+
// Override modifiers for bf16(i16) (same as float modifiers).
938+
let HasSrc0Mods = 1;
939+
let HasSrc1Mods = 1;
940+
let HasSrc2Mods = 1;
941+
let Src0ModVOP3DPP = FPVRegInputMods;
942+
let Src1ModVOP3DPP = FP32VCSrcInputMods;
943+
let Src2ModVOP3DPP = FPT16VCSrcInputMods</*IsFake16*/0>;
944+
}
945+
946+
class VOP3_DOT_Profile_fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile_Fake16<P, Features> {
947+
let HasClamp = 0;
948+
let HasOMod = 0;
949+
// Override modifiers for bf16(i16) (same as float modifiers).
950+
let HasSrc0Mods = 1;
951+
let HasSrc1Mods = 1;
952+
let HasSrc2Mods = 1;
953+
let AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
954+
HasOpSel, HasOMod, IsVOP3P, HasModifiers, 1/*HasSrc0Mods*/, 1/*HasSrc1Mods*/,
955+
1/*HasSrc2Mods*/, DstVT>.ret;
956+
}
957+
934958
let SubtargetPredicate = isGFX11Plus in {
935959
defm V_MAXMIN_F32 : VOP3Inst<"v_maxmin_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
936960
defm V_MINMAX_F32 : VOP3Inst<"v_minmax_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
@@ -951,9 +975,15 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
951975
defm V_MINIMUMMAXIMUM_F16 : VOP3Inst<"v_minimummaximum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
952976
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
953977

954-
let OtherPredicates = [HasDot9Insts], IsDOT=1 in {
955-
defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>, int_amdgcn_fdot2_f16_f16>;
956-
defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_BF16_V2BF16_V2BF16_BF16>, int_amdgcn_fdot2_bf16_bf16>;
978+
let SubtargetPredicate = HasDot9Insts, IsDOT=1 in {
979+
defm V_DOT2_F16_F16 : VOP3Inst_t16_with_profiles<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>,
980+
VOP3_DOT_Profile_t16<VOP_F16_V2F16_V2F16_F16>,
981+
VOP3_DOT_Profile_fake16<VOP_F16_V2F16_V2F16_F16>,
982+
int_amdgcn_fdot2_f16_f16>;
983+
defm V_DOT2_BF16_BF16 : VOP3Inst_t16_with_profiles<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_BF16_V2BF16_V2BF16_BF16>,
984+
VOP3_DOT_Profile_t16<VOP_BF16_V2BF16_V2BF16_BF16>,
985+
VOP3_DOT_Profile_fake16<VOP_BF16_V2BF16_V2BF16_BF16>,
986+
int_amdgcn_fdot2_bf16_bf16>;
957987
}
958988

959989
class VOP_Pseudo_Scalar<RegisterClass Dst, RegisterOperand SrcOp,
@@ -1112,8 +1142,10 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
11121142
VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
11131143
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
11141144

1115-
multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
1116-
VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
1145+
multiclass VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME> {
1146+
defm _t16: VOP3Dot_Realtriple_gfx11_gfx12<op, asmName, 0, opName#"_t16">;
1147+
defm _fake16: VOP3Dot_Realtriple_gfx11_gfx12<op, asmName, 0, opName#"_fake16">;
1148+
}
11171149

11181150
multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
11191151
string pseudo_mnemonic = "", bit isSingle = 0> :
@@ -1205,8 +1237,8 @@ defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11_gfx12<0x262>;
12051237
defm V_MINMAX_U32 : VOP3_Realtriple_gfx11_gfx12<0x263>;
12061238
defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11_gfx12<0x264>;
12071239
defm V_MINMAX_I32 : VOP3_Realtriple_gfx11_gfx12<0x265>;
1208-
defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_gfx11_gfx12<0x266>;
1209-
defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_gfx11_gfx12<0x267>;
1240+
defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x266, "v_dot2_f16_f16">;
1241+
defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x267, "v_dot2_bf16_bf16">;
12101242
defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">;
12111243
defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
12121244
defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -347,9 +347,12 @@ class VOP3FP8OpSel_src_bytesel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3e_gf
347347
let Inst{14-13} = byte_sel; // op_sel2/3
348348
}
349349

350-
class VOP3DotOpSel_gfx11_gfx12<bits<10> op, VOPProfile p> : VOP3OpSel_gfx11_gfx12<op, p>{
350+
class VOP3DotOpSel_gfx11_gfx12<bits<10> op, VOPProfile p> :
351+
VOP3e_t16_gfx11_gfx12<op, p>{
351352
let Inst{11} = ?;
352353
let Inst{12} = ?;
354+
let Inst{13} = !if(p.HasSrc2Mods, src2_modifiers{2}, 0);
355+
let Inst{14} = !if(!and(p.HasDst, p.HasSrc0Mods), src0_modifiers{3}, 0);
353356
}
354357

355358
// NB: For V_INTERP* opcodes, src0 is encoded as src1 and vice versa
@@ -1611,10 +1614,12 @@ multiclass VOP3_Real_Base<GFXGen Gen, bits<10> op, string opName = NAME,
16111614
}
16121615
}
16131616

1614-
multiclass VOP3Dot_Real_Base<GFXGen Gen, bits<10> op, string opName = NAME,
1617+
multiclass VOP3Dot_Real_Base<GFXGen Gen, bits<10> op, string asmName, string opName = NAME,
16151618
bit isSingle = 0> {
16161619
defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
1617-
let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
1620+
let AsmString = asmName # ps.AsmOperands,
1621+
DecoderNamespace = Gen.DecoderNamespace # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"),
1622+
IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
16181623
def _e64#Gen.Suffix :
16191624
VOP3_Real_Gen<ps, Gen>,
16201625
VOP3DotOpSel_gfx11_gfx12<op, ps.Pfl>;
@@ -1678,9 +1683,13 @@ multiclass VOP3_Real_dpp_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
16781683
VOP3_DPP16_Gen<op, ps, Gen>;
16791684
}
16801685

1681-
multiclass VOP3Dot_Real_dpp_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
1686+
multiclass VOP3Dot_Real_dpp_Base<GFXGen Gen, bits<10> op, string asmName, string opName = NAME> {
1687+
defvar ps = !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp");
16821688
def _e64_dpp#Gen.Suffix :
1683-
VOP3_DPP16_Gen<op, !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp"), Gen> {
1689+
VOP3_DPP16_Gen_t16<op, ps, Gen> {
1690+
let AsmString = asmName # ps.Pfl.AsmVOP3DPP16;
1691+
let DecoderNamespace = Gen.DecoderNamespace
1692+
# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
16841693
let Inst{11} = ?;
16851694
let Inst{12} = ?;
16861695
}
@@ -1702,12 +1711,14 @@ multiclass VOP3_Real_dpp8_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
17021711
}
17031712
}
17041713

1705-
multiclass VOP3Dot_Real_dpp8_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
1714+
multiclass VOP3Dot_Real_dpp8_Base<GFXGen Gen, bits<10> op, string asmName, string opName = NAME> {
17061715
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
1707-
def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8<op, ps> {
1716+
def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8_t16<op, ps> {
17081717
let Inst{11} = ?;
17091718
let Inst{12} = ?;
1710-
let DecoderNamespace = Gen.DecoderNamespace;
1719+
let AsmString = asmName # ps.Pfl.AsmVOP3DPP8;
1720+
let DecoderNamespace = Gen.DecoderNamespace
1721+
# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
17111722
let AssemblerPredicate = Gen.AssemblerPredicate;
17121723
}
17131724
}
@@ -1760,11 +1771,11 @@ multiclass VOP3_Realtriple<GFXGen Gen, bits<10> op, bit isSingle = 0,
17601771
VOP3_Real_dpp_Base<Gen, op, opName>,
17611772
VOP3_Real_dpp8_Base<Gen, op, opName>;
17621773

1763-
multiclass VOP3Dot_Realtriple<GFXGen Gen, bits<10> op, bit isSingle = 0,
1774+
multiclass VOP3Dot_Realtriple<GFXGen Gen, bits<10> op, string asmName, bit isSingle = 0,
17641775
string opName = NAME> :
1765-
VOP3Dot_Real_Base<Gen, op, opName, isSingle>,
1766-
VOP3Dot_Real_dpp_Base<Gen, op, opName>,
1767-
VOP3Dot_Real_dpp8_Base<Gen, op, opName>;
1776+
VOP3Dot_Real_Base<Gen, op, asmName, opName, isSingle>,
1777+
VOP3Dot_Real_dpp_Base<Gen, op, asmName, opName>,
1778+
VOP3Dot_Real_dpp8_Base<Gen, op, asmName, opName>;
17681779

17691780
multiclass VOP3Only_Realtriple<GFXGen Gen, bits<10> op> :
17701781
VOP3_Realtriple<Gen, op, 1>;
@@ -1862,6 +1873,12 @@ multiclass VOP3Only_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
18621873
VOP3Only_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
18631874
VOP3Only_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
18641875

1876+
multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op, string asmName, bit isSingle = 0,
1877+
string opName = NAME> :
1878+
VOP3Dot_Realtriple<GFX11Gen, op, asmName, isSingle, opName>,
1879+
VOP3Dot_Realtriple<GFX12Gen, op, asmName, isSingle, opName>;
1880+
1881+
18651882
//===----------------------------------------------------------------------===//
18661883

18671884
include "VOPCInstructions.td"

llvm/test/MC/AMDGPU/gfx11_asm_vop3.s

Lines changed: 52 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2119,104 +2119,116 @@ v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2
21192119
// W64: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf]
21202120
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
21212121

2122-
v_dot2_bf16_bf16 v5, v1, v2, s3
2122+
v_dot2_bf16_bf16 v5.l, v1, v2, s3
21232123
// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00]
21242124

2125-
v_dot2_bf16_bf16 v5, v255, v255, s105
2125+
v_dot2_bf16_bf16 v5.l, v255, v255, s105
21262126
// GFX11: encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01]
21272127

2128-
v_dot2_bf16_bf16 v5, s1, s2, v3
2128+
v_dot2_bf16_bf16 v5.l, s1, s2, v3.l
21292129
// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04]
21302130

2131-
v_dot2_bf16_bf16 v5, s105, s105, m0
2131+
v_dot2_bf16_bf16 v5.l, s105, s105, m0
21322132
// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01]
21332133

2134-
v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255
2135-
// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07]
2134+
v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h
2135+
// GFX11: encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07]
21362136

2137-
v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi
2137+
v_dot2_bf16_bf16 v5.l, vcc_hi, 0xfe0b, vcc_hi
21382138
// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00]
21392139

2140-
v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15
2140+
v_dot2_bf16_bf16 v5.l, ttmp15, src_scc, ttmp15
21412141
// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01]
21422142

2143-
v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo
2143+
v_dot2_bf16_bf16 v5.l, |m0|, -1, -vcc_lo
21442144
// GFX11: encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81]
21452145

2146-
v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b|
2146+
v_dot2_bf16_bf16 v5.l, -|exec_lo|, null, -|0xfe0b|
21472147
// GFX11: encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00]
21482148

2149-
v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo|
2149+
v_dot2_bf16_bf16 v5.l, -|exec_hi|, -|exec_lo|, -|exec_lo|
21502150
// GFX11: encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1]
21512151

2152-
v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc|
2152+
v_dot2_bf16_bf16 v5.l, null, -exec_hi, |src_scc|
21532153
// GFX11: encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43]
21542154

2155-
v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| op_sel:[0,0,0,0]
2155+
v_dot2_bf16_bf16 v5.l, -1, -|m0|, -|exec_hi|
21562156
// GFX11: encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1]
21572157

2158-
v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
2159-
// GFX11: encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23]
2158+
v_dot2_bf16_bf16 v5.l, -src_scc, |vcc_lo|, -1
2159+
// GFX11: encoding: [0x05,0x02,0x67,0xd6,0xfd,0xd4,0x04,0x23]
21602160

2161-
v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1]
2161+
v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null
21622162
// GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
21632163

2164-
v_dot2_bf16_bf16 v2, v0, 0x20004000, v2
2165-
// GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
2164+
v_dot2_bf16_bf16 v2.l, v0, 0x20004000, v2.l
2165+
// GFX11: encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
21662166

2167-
v_dot2_bf16_bf16 v2, 0x20004000, v0, v2
2168-
// GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
2167+
v_dot2_bf16_bf16 v2.l, 0x20004000, v0, v2.l
2168+
// GFX11: encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
21692169

2170-
v_dot2_f16_f16 v5, v1, v2, s3
2170+
v_dot2_bf16_bf16 v5.l, vcc_lo, ttmp15, v255.h
2171+
// GFX11: encoding: [0x05,0x20,0x67,0xd6,0x6a,0xf6,0xfc,0x07]
2172+
2173+
v_dot2_bf16_bf16 v255.h, -|0xfe0b|, -|vcc_hi|, null
2174+
// GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
2175+
2176+
v_dot2_f16_f16 v5.l, v1, v2, s3
21712177
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00]
21722178

2173-
v_dot2_f16_f16 v5, v255, s2, s105
2179+
v_dot2_f16_f16 v5.l, v255, s2, s105
21742180
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01]
21752181

2176-
v_dot2_f16_f16 v5, s1, v255, exec_hi
2182+
v_dot2_f16_f16 v5.l, s1, v255, exec_hi
21772183
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01]
21782184

2179-
v_dot2_f16_f16 v5, s105, s105, exec_lo
2185+
v_dot2_f16_f16 v5.l, s105, s105, exec_lo
21802186
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01]
21812187

2182-
v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3
2188+
v_dot2_f16_f16 v5.l, vcc_lo, ttmp15, v3.l
21832189
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04]
21842190

2185-
v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255
2186-
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
2191+
v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h
2192+
// GFX11: encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
21872193

2188-
v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15|
2194+
v_dot2_f16_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15|
21892195
// GFX11: encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1]
21902196

2191-
v_dot2_f16_f16 v5, m0, 0.5, m0
2197+
v_dot2_f16_f16 v5.l, m0, 0.5, m0
21922198
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01]
21932199

2194-
v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi
2200+
v_dot2_f16_f16 v5.l, |exec_lo|, -1, vcc_hi
21952201
// GFX11: encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01]
21962202

2197-
v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo|
2203+
v_dot2_f16_f16 v5.l, -|exec_hi|, null, -|vcc_lo|
21982204
// GFX11: encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1]
21992205

2200-
v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b|
2206+
v_dot2_f16_f16 v5.l, null, exec_lo, -|0xfe0b|
22012207
// GFX11: encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
22022208

2203-
v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc|
2209+
v_dot2_f16_f16 v5.l, -1, -|exec_hi|, -|src_scc|
22042210
// GFX11: encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3]
22052211

2206-
v_dot2_f16_f16 v5, 0.5, -m0, 0.5 op_sel:[0,0,0,0]
2212+
v_dot2_f16_f16 v5.l, 0.5, -m0, 0.5
22072213
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43]
22082214

2209-
v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
2215+
v_dot2_f16_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
22102216
// GFX11: encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23]
22112217

2212-
v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1]
2218+
v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null
22132219
// GFX11: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
22142220

2215-
v_dot2_f16_f16 v2, v0, 0x20004000, v2
2216-
// GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
2221+
v_dot2_f16_f16 v2.l, v0, 0x20004000, v2.l
2222+
// GFX11: encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
22172223

2218-
v_dot2_f16_f16 v2, 0x20004000, v0, v2
2219-
// GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
2224+
v_dot2_f16_f16 v2.l, 0x20004000, v0, v2.l
2225+
// GFX11: encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
2226+
2227+
v_dot2_f16_f16 v5.l, vcc_hi, 0xfe0b, v255.h
2228+
// GFX11: encoding: [0x05,0x20,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
2229+
2230+
v_dot2_f16_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null
2231+
// GFX11: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
22202232

22212233
v_fma_dx9_zero_f32 v5, v1, v2, s3
22222234
// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00]

0 commit comments

Comments
 (0)