Skip to content

Commit ea6c36f

Browse files
committed
True16 for v_div_fixup_f16 in MC
1 parent a1d71c3 commit ea6c36f

14 files changed

+900
-306
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9284,6 +9284,7 @@ static bool isRenamedInGFX9(int Opcode) {
92849284
GENERATE_RENAMED_GFX9_CASES(AMDGPU::V_SUB_U32)
92859285
//
92869286
case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
9287+
case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
92879288
case AMDGPU::V_FMA_F16_gfx9_e64:
92889289
case AMDGPU::V_INTERP_P2_F16:
92899290
case AMDGPU::V_MAD_F16_e64:

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,7 @@ let FPDPRounding = 1 in {
339339
} // End Predicates = [Has16BitInsts, isGFX8Only]
340340

341341
let SubtargetPredicate = isGFX9Plus in {
342-
defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
343-
VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup>;
342+
defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst_t16 <"v_div_fixup_f16_gfx9", VOP_F16_F16_F16_F16, AMDGPUdiv_fixup>;
344343
defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma>;
345344
} // End SubtargetPredicate = isGFX9Plus
346345
} // End FPDPRounding = 1
@@ -1717,7 +1716,7 @@ defm V_MED3_F16 : VOP3_Realtriple_gfx11<0x24f>;
17171716
defm V_MED3_I16 : VOP3_Realtriple_gfx11_gfx12<0x250>;
17181717
defm V_MED3_U16 : VOP3_Realtriple_gfx11_gfx12<0x251>;
17191718
defm V_MAD_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x253, "V_MAD_I16_gfx9", "v_mad_i16">;
1720-
defm V_DIV_FIXUP_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x254, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
1719+
defm V_DIV_FIXUP_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x254, "v_div_fixup_f16", "V_DIV_FIXUP_F16_gfx9">;
17211720
defm V_ADD3_U32 : VOP3_Realtriple_gfx11_gfx12<0x255>;
17221721
defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x256>;
17231722
defm V_AND_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x257>;

llvm/test/MC/AMDGPU/gfx11_asm_vop3.s

Lines changed: 56 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1574,53 +1574,77 @@ v_cvt_pk_norm_u16_f32 v5, -src_scc, |vcc_lo|
15741574
v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi|
15751575
// GFX11: v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
15761576

1577-
v_div_fixup_f16 v5, v1, v2, s3
1578-
// GFX11: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
1577+
v_div_fixup_f16 v5.l, v1.l, v2.l, s3
1578+
// GFX11: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
15791579

1580-
v_div_fixup_f16 v5, v255, s2, s105
1581-
// GFX11: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
1580+
v_div_fixup_f16 v5.l, v255.l, s2, s105
1581+
// GFX11: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
15821582

1583-
v_div_fixup_f16 v5, s1, v255, exec_hi
1584-
// GFX11: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
1583+
v_div_fixup_f16 v5.l, s1, v255.l, exec_hi
1584+
// GFX11: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
15851585

1586-
v_div_fixup_f16 v5, s105, s105, exec_lo
1587-
// GFX11: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
1586+
v_div_fixup_f16 v5.l, s105, s105, exec_lo
1587+
// GFX11: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
15881588

1589-
v_div_fixup_f16 v5, vcc_lo, ttmp15, v3
1590-
// GFX11: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
1589+
v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l
1590+
// GFX11: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
15911591

1592-
v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255
1593-
// GFX11: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
1592+
v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l
1593+
// GFX11: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
15941594

1595-
v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15|
1596-
// GFX11: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
1595+
v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15|
1596+
// GFX11: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
15971597

1598-
v_div_fixup_f16 v5, m0, 0.5, m0
1599-
// GFX11: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
1598+
v_div_fixup_f16 v5.l, m0, 0.5, m0
1599+
// GFX11: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
16001600

1601-
v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi
1602-
// GFX11: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
1601+
v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi
1602+
// GFX11: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
16031603

1604-
v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1]
1605-
// GFX11: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
1604+
v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1]
1605+
// GFX11: v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
16061606

1607-
v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0]
1608-
// GFX11: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
1607+
v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b|
1608+
// GFX11: v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
16091609

1610-
v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0]
1611-
// GFX11: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
1610+
v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0]
1611+
// GFX11: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
16121612

1613-
v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0]
1614-
// GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
1613+
v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0]
1614+
// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
16151615

1616-
v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
1617-
// GFX11: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
1616+
v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
1617+
// GFX11: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
16181618

1619-
v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp
1620-
// GFX11: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
1619+
v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp
1620+
// GFX11: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
16211621

1622-
v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2
1623-
// GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
1622+
v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2
1623+
// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
1624+
1625+
v_div_fixup_f16 v5.l, v255.h, s2, s105
1626+
// GFX11: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01]
1627+
1628+
v_div_fixup_f16 v5.l, s1, v255.h, exec_hi
1629+
// GFX11: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01]
1630+
1631+
v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h
1632+
// GFX11: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
1633+
1634+
v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo|
1635+
// GFX11: v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
1636+
1637+
v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc|
1638+
// GFX11: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
1639+
1640+
v_div_fixup_f16 v5.l, 0.5, -m0, 0.5
1641+
// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
1642+
1643+
v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1
1644+
// GFX11: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x54,0xd6,0xfd,0xd4,0x04,0x23]
1645+
1646+
v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 mul:2
1647+
// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
16241648

16251649
v_div_fixup_f32 v5, v1, v2, s3
16261650
// GFX11: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00]

0 commit comments

Comments
 (0)