Skip to content

Commit e10b12e

Browse files
authored
[AMDGPU][True16][MC] true16 for v_div_fixup_f16 (#119613)
Support true16 format for v_div_fixup_f16 in MC.
1 parent dc0ea0f commit e10b12e

14 files changed

+900
-306
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9292,6 +9292,7 @@ static bool isRenamedInGFX9(int Opcode) {
92929292
GENERATE_RENAMED_GFX9_CASES(AMDGPU::V_SUB_U32)
92939293
//
92949294
case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
9295+
case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
92959296
case AMDGPU::V_FMA_F16_gfx9_e64:
92969297
case AMDGPU::V_INTERP_P2_F16:
92979298
case AMDGPU::V_MAD_F16_e64:

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,7 @@ let FPDPRounding = 1 in {
339339
} // End Predicates = [Has16BitInsts, isGFX8Only]
340340

341341
let SubtargetPredicate = isGFX9Plus in {
342-
defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
343-
VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup>;
342+
defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst_t16 <"v_div_fixup_f16_gfx9", VOP_F16_F16_F16_F16, AMDGPUdiv_fixup>;
344343
defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma>;
345344
} // End SubtargetPredicate = isGFX9Plus
346345
} // End FPDPRounding = 1
@@ -1720,7 +1719,7 @@ defm V_MED3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x24f, "v_
17201719
defm V_MED3_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x250, "v_med3_i16">;
17211720
defm V_MED3_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x251, "v_med3_u16">;
17221721
defm V_MAD_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x253, "v_mad_i16", "V_MAD_I16_gfx9">;
1723-
defm V_DIV_FIXUP_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x254, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
1722+
defm V_DIV_FIXUP_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x254, "v_div_fixup_f16", "V_DIV_FIXUP_F16_gfx9">;
17241723
defm V_ADD3_U32 : VOP3_Realtriple_gfx11_gfx12<0x255>;
17251724
defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x256>;
17261725
defm V_AND_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x257>;

llvm/test/MC/AMDGPU/gfx11_asm_vop3.s

Lines changed: 56 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1607,53 +1607,77 @@ v_cvt_pk_norm_u16_f32 v5, -src_scc, |vcc_lo|
16071607
v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi|
16081608
// GFX11: v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
16091609

1610-
v_div_fixup_f16 v5, v1, v2, s3
1611-
// GFX11: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
1610+
v_div_fixup_f16 v5.l, v1.l, v2.l, s3
1611+
// GFX11: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
16121612

1613-
v_div_fixup_f16 v5, v255, s2, s105
1614-
// GFX11: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
1613+
v_div_fixup_f16 v5.l, v255.l, s2, s105
1614+
// GFX11: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
16151615

1616-
v_div_fixup_f16 v5, s1, v255, exec_hi
1617-
// GFX11: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
1616+
v_div_fixup_f16 v5.l, s1, v255.l, exec_hi
1617+
// GFX11: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
16181618

1619-
v_div_fixup_f16 v5, s105, s105, exec_lo
1620-
// GFX11: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
1619+
v_div_fixup_f16 v5.l, s105, s105, exec_lo
1620+
// GFX11: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
16211621

1622-
v_div_fixup_f16 v5, vcc_lo, ttmp15, v3
1623-
// GFX11: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
1622+
v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l
1623+
// GFX11: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
16241624

1625-
v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255
1626-
// GFX11: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
1625+
v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l
1626+
// GFX11: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
16271627

1628-
v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15|
1629-
// GFX11: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
1628+
v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15|
1629+
// GFX11: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
16301630

1631-
v_div_fixup_f16 v5, m0, 0.5, m0
1632-
// GFX11: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
1631+
v_div_fixup_f16 v5.l, m0, 0.5, m0
1632+
// GFX11: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
16331633

1634-
v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi
1635-
// GFX11: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
1634+
v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi
1635+
// GFX11: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
16361636

1637-
v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1]
1638-
// GFX11: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
1637+
v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1]
1638+
// GFX11: v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
16391639

1640-
v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0]
1641-
// GFX11: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
1640+
v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b|
1641+
// GFX11: v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
16421642

1643-
v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0]
1644-
// GFX11: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
1643+
v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0]
1644+
// GFX11: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
16451645

1646-
v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0]
1647-
// GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
1646+
v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0]
1647+
// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
16481648

1649-
v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
1650-
// GFX11: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
1649+
v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
1650+
// GFX11: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
16511651

1652-
v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp
1653-
// GFX11: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
1652+
v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp
1653+
// GFX11: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
16541654

1655-
v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2
1656-
// GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
1655+
v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2
1656+
// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
1657+
1658+
v_div_fixup_f16 v5.l, v255.h, s2, s105
1659+
// GFX11: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01]
1660+
1661+
v_div_fixup_f16 v5.l, s1, v255.h, exec_hi
1662+
// GFX11: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01]
1663+
1664+
v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h
1665+
// GFX11: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
1666+
1667+
v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo|
1668+
// GFX11: v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
1669+
1670+
v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc|
1671+
// GFX11: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
1672+
1673+
v_div_fixup_f16 v5.l, 0.5, -m0, 0.5
1674+
// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
1675+
1676+
v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1
1677+
// GFX11: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x54,0xd6,0xfd,0xd4,0x04,0x23]
1678+
1679+
v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 mul:2
1680+
// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
16571681

16581682
v_div_fixup_f32 v5, v1, v2, s3
16591683
// GFX11: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00]

0 commit comments

Comments
 (0)