Skip to content

Commit fadbedf

Browse files
committed
[WIP][AMDGPU] Split isInlinableLiteral16 into three and call the specific version if possible
The current implementation of `isInlinableLiteral16` assumes, a 16-bit inlinable literal is either an i16 or a fp16. This is not always true because of bf16. However, we can't tell fp16 and bf16 apart by just looking at the value. This patch tries to split `isInlinableLiteral16` into three versions, i16, fp16, bf16 respectively, and call the corresponding version. This patch is based on #81282. The current status is, only two uses of original `isInlinableLiteral16` are still there. We need to add an extra argument to indicate the type of the operand the immediate corresponds to. This will also require the change of the function signature of the two callers.
1 parent 3fc277f commit fadbedf

11 files changed

+230
-119
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

+43-11
Original file line numberDiff line numberDiff line change
@@ -2006,8 +2006,12 @@ static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
20062006
return isInlinableIntLiteral(Val);
20072007
}
20082008

2009-
// f16/v2f16 operands work correctly for all values.
2010-
return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
2009+
if (VT.getScalarType() == MVT::f16)
2010+
return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2011+
2012+
assert(VT.getScalarType() == MVT::bf16);
2013+
2014+
return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
20112015
}
20122016

20132017
bool AMDGPUOperand::isInlinableImm(MVT type) const {
@@ -2375,15 +2379,26 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
23752379
return;
23762380

23772381
case AMDGPU::OPERAND_REG_IMM_INT16:
2378-
case AMDGPU::OPERAND_REG_IMM_FP16:
2379-
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
23802382
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2381-
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
23822383
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2384+
if (isSafeTruncation(Val, 16) &&
2385+
AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2386+
Inst.addOperand(MCOperand::createImm(Val));
2387+
setImmKindConst();
2388+
return;
2389+
}
2390+
2391+
Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2392+
setImmKindLiteral();
2393+
return;
2394+
2395+
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2396+
case AMDGPU::OPERAND_REG_IMM_FP16:
2397+
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
23832398
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
23842399
if (isSafeTruncation(Val, 16) &&
2385-
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2386-
AsmParser->hasInv2PiInlineImm())) {
2400+
AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2401+
AsmParser->hasInv2PiInlineImm())) {
23872402
Inst.addOperand(MCOperand::createImm(Val));
23882403
setImmKindConst();
23892404
return;
@@ -2410,12 +2425,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
24102425
return;
24112426

24122427
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2428+
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
2429+
assert(isSafeTruncation(Val, 16));
2430+
assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2431+
Inst.addOperand(MCOperand::createImm(Val));
2432+
return;
2433+
}
24132434
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2414-
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
24152435
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
24162436
assert(isSafeTruncation(Val, 16));
2417-
assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2418-
AsmParser->hasInv2PiInlineImm()));
2437+
assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2438+
AsmParser->hasInv2PiInlineImm()));
24192439

24202440
Inst.addOperand(MCOperand::createImm(Val));
24212441
return;
@@ -3559,7 +3579,19 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
35593579
OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
35603580
return AMDGPU::isInlinableLiteralV2BF16(Val);
35613581

3562-
return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3582+
if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3583+
OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
3584+
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 ||
3585+
OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED)
3586+
return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3587+
3588+
if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||
3589+
OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16 ||
3590+
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_BF16 ||
3591+
OperandType == AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED)
3592+
return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3593+
3594+
llvm_unreachable("invalid operand type");
35633595
}
35643596
default:
35653597
llvm_unreachable("invalid operand size");

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

+8-10
Original file line numberDiff line numberDiff line change
@@ -460,10 +460,8 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
460460
}
461461
}
462462

463-
// This must accept a 32-bit immediate value to correctly handle packed 16-bit
464-
// operations.
465-
static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
466-
raw_ostream &O) {
463+
static bool printImmediateFP16(uint32_t Imm, const MCSubtargetInfo &STI,
464+
raw_ostream &O) {
467465
if (Imm == 0x3C00)
468466
O << "1.0";
469467
else if (Imm == 0xBC00)
@@ -529,17 +527,17 @@ void AMDGPUInstPrinter::printImmediateBF16(uint32_t Imm,
529527
O << formatHex(static_cast<uint64_t>(Imm));
530528
}
531529

532-
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
533-
const MCSubtargetInfo &STI,
534-
raw_ostream &O) {
530+
void AMDGPUInstPrinter::printImmediateF16(uint32_t Imm,
531+
const MCSubtargetInfo &STI,
532+
raw_ostream &O) {
535533
int16_t SImm = static_cast<int16_t>(Imm);
536534
if (isInlinableIntLiteral(SImm)) {
537535
O << SImm;
538536
return;
539537
}
540538

541539
uint16_t HImm = static_cast<uint16_t>(Imm);
542-
if (printImmediateFloat16(HImm, STI, O))
540+
if (printImmediateFP16(HImm, STI, O))
543541
return;
544542

545543
uint64_t Imm16 = static_cast<uint16_t>(Imm);
@@ -566,7 +564,7 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
566564
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
567565
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
568566
if (isUInt<16>(Imm) &&
569-
printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
567+
printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
570568
return;
571569
break;
572570
case AMDGPU::OPERAND_REG_IMM_V2BF16:
@@ -845,7 +843,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
845843
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
846844
case AMDGPU::OPERAND_REG_IMM_FP16:
847845
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
848-
printImmediate16(Op.getImm(), STI, O);
846+
printImmediateF16(Op.getImm(), STI, O);
849847
break;
850848
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
851849
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
8686
raw_ostream &O);
8787
void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
8888
raw_ostream &O);
89-
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
90-
raw_ostream &O);
9189
void printImmediateBF16(uint32_t Imm, const MCSubtargetInfo &STI,
9290
raw_ostream &O);
91+
void printImmediateF16(uint32_t Imm, const MCSubtargetInfo &STI,
92+
raw_ostream &O);
9393
void printImmediateV216(uint32_t Imm, uint8_t OpType,
9494
const MCSubtargetInfo &STI, raw_ostream &O);
9595
bool printImmediateFloat32(uint32_t Imm, const MCSubtargetInfo &STI,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

+22-6
Original file line numberDiff line numberDiff line change
@@ -15405,16 +15405,32 @@ bool SITargetLowering::checkAsmConstraintVal(SDValue Op, StringRef Constraint,
1540515405
llvm_unreachable("Invalid asm constraint");
1540615406
}
1540715407

15408-
bool SITargetLowering::checkAsmConstraintValA(SDValue Op,
15409-
uint64_t Val,
15408+
bool SITargetLowering::checkAsmConstraintValA(SDValue Op, uint64_t Val,
1541015409
unsigned MaxSize) const {
1541115410
unsigned Size = std::min<unsigned>(Op.getScalarValueSizeInBits(), MaxSize);
1541215411
bool HasInv2Pi = Subtarget->hasInv2PiInlineImm();
15413-
if ((Size == 16 && AMDGPU::isInlinableLiteral16(Val, HasInv2Pi)) ||
15414-
(Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
15415-
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi))) {
15412+
if (Size == 16) {
15413+
MVT VT = Op.getSimpleValueType();
15414+
switch (VT.SimpleTy) {
15415+
default:
15416+
return false;
15417+
case MVT::i16:
15418+
return AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
15419+
case MVT::f16:
15420+
return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
15421+
case MVT::bf16:
15422+
return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
15423+
case MVT::v2i16:
15424+
return AMDGPU::getInlineEncodingV2I16(Val).has_value();
15425+
case MVT::v2f16:
15426+
return AMDGPU::getInlineEncodingV2F16(Val).has_value();
15427+
case MVT::v2bf16:
15428+
return AMDGPU::getInlineEncodingV2BF16(Val).has_value();
15429+
}
15430+
}
15431+
if ((Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
15432+
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi)))
1541615433
return true;
15417-
}
1541815434
return false;
1541915435
}
1542015436

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

+22-3
Original file line numberDiff line numberDiff line change
@@ -4121,13 +4121,32 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
41214121
ST.hasInv2PiInlineImm());
41224122
case 16:
41234123
return ST.has16BitInsts() &&
4124-
AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
4125-
ST.hasInv2PiInlineImm());
4124+
AMDGPU::isInlinableLiteralI16(Imm.getSExtValue(),
4125+
ST.hasInv2PiInlineImm());
41264126
default:
41274127
llvm_unreachable("invalid bitwidth");
41284128
}
41294129
}
41304130

4131+
bool SIInstrInfo::isInlineConstant(const APFloat &Imm) const {
4132+
APInt IntImm = Imm.bitcastToAPInt();
4133+
int64_t IntImmVal = IntImm.getSExtValue();
4134+
bool HasInv2Pi = ST.hasInv2PiInlineImm();
4135+
switch (APFloat::SemanticsToEnum(Imm.getSemantics())) {
4136+
default:
4137+
llvm_unreachable("invalid fltSemantics");
4138+
case APFloatBase::S_IEEEsingle:
4139+
case APFloatBase::S_IEEEdouble:
4140+
return isInlineConstant(IntImm);
4141+
case APFloatBase::S_BFloat:
4142+
return ST.has16BitInsts() &&
4143+
AMDGPU::isInlinableLiteralBF16(IntImmVal, HasInv2Pi);
4144+
case APFloatBase::S_IEEEhalf:
4145+
return ST.has16BitInsts() &&
4146+
AMDGPU::isInlinableLiteralFP16(IntImmVal, HasInv2Pi);
4147+
}
4148+
}
4149+
41314150
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
41324151
uint8_t OperandType) const {
41334152
assert(!MO.isReg() && "isInlineConstant called on register operand!");
@@ -4200,7 +4219,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
42004219
// constants in these cases
42014220
int16_t Trunc = static_cast<int16_t>(Imm);
42024221
return ST.has16BitInsts() &&
4203-
AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
4222+
AMDGPU::isInlinableLiteralFP16(Trunc, ST.hasInv2PiInlineImm());
42044223
}
42054224

42064225
return false;

llvm/lib/Target/AMDGPU/SIInstrInfo.h

+1-3
Original file line numberDiff line numberDiff line change
@@ -984,9 +984,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
984984

985985
bool isInlineConstant(const APInt &Imm) const;
986986

987-
bool isInlineConstant(const APFloat &Imm) const {
988-
return isInlineConstant(Imm.bitcastToAPInt());
989-
}
987+
bool isInlineConstant(const APFloat &Imm) const;
990988

991989
// Returns true if this non-register operand definitely does not need to be
992990
// encoded as a 32-bit literal. Note that this function handles all kinds of

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

+8-2
Original file line numberDiff line numberDiff line change
@@ -2647,13 +2647,19 @@ bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
26472647
Val == 0x3E22; // 1.0 / (2.0 * pi)
26482648
}
26492649

2650-
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
2650+
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi) {
26512651
if (!HasInv2Pi)
26522652
return false;
2653-
26542653
if (isInlinableIntLiteral(Literal))
26552654
return true;
2655+
return Literal == static_cast<int16_t>(0x3e22f983);
2656+
}
26562657

2658+
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
2659+
if (!HasInv2Pi)
2660+
return false;
2661+
if (isInlinableIntLiteral(Literal))
2662+
return true;
26572663
uint16_t Val = static_cast<uint16_t>(Literal);
26582664
return Val == 0x3C00 || // 1.0
26592665
Val == 0xBC00 || // -1.0

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -1397,7 +1397,13 @@ LLVM_READNONE
13971397
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
13981398

13991399
LLVM_READNONE
1400-
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
1400+
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1401+
1402+
LLVM_READNONE
1403+
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1404+
1405+
LLVM_READNONE
1406+
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi);
14011407

14021408
LLVM_READNONE
14031409
std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);

llvm/test/CodeGen/AMDGPU/immv216.ll

+21-21
Original file line numberDiff line numberDiff line change
@@ -577,40 +577,40 @@ define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x h
577577
}
578578

579579
; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16:
580-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x38003800
581-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
580+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3800
581+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
582582

583-
; GFX10: v_pk_mul_lo_u16 v0, 0x38003800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x38]
583+
; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x00]
584584
define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
585585
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>)
586586
ret <2 x i16> %y
587587
}
588588

589589
; GCN-LABEL: {{^}}mul_inline_imm_neg_0.5_v2i16:
590-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xb800b800
591-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
590+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xb800
591+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
592592

593-
; GFX10: v_pk_mul_lo_u16 v0, 0xb800b800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0x00,0xb8]
593+
; GFX10: v_pk_mul_lo_u16 v0, 0xffffb800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0xff,0xff]
594594
define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
595595
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>)
596596
ret <2 x i16> %y
597597
}
598598

599599
; GCN-LABEL: {{^}}mul_inline_imm_1.0_v2i16:
600-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x3c003c00
601-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
600+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3c00
601+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
602602

603-
; GFX10: v_pk_mul_lo_u16 v0, 0x3c003c00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x3c]
603+
; GFX10: v_pk_mul_lo_u16 v0, 0x3c00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x00]
604604
define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
605605
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>)
606606
ret <2 x i16> %y
607607
}
608608

609609
; GCN-LABEL: {{^}}mul_inline_imm_neg_1.0_v2i16:
610-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00bc00
611-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
610+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xbc00
611+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
612612

613-
; GFX10: v_pk_mul_lo_u16 v0, 0xbc00bc00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0x00,0xbc]
613+
; GFX10: v_pk_mul_lo_u16 v0, 0xffffbc00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0xff,0xff]
614614
define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) {
615615
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>)
616616
ret <2 x i16> %y
@@ -635,31 +635,31 @@ define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
635635
}
636636

637637
; GCN-LABEL: {{^}}mul_inline_imm_4.0_v2i16:
638-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x44004400
639-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
638+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x4400
639+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
640640

641-
; GFX10: v_pk_mul_lo_u16 v0, 0x44004400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x44]
641+
; GFX10: v_pk_mul_lo_u16 v0, 0x4400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x00]
642642
define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
643643
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>)
644644
ret <2 x i16> %y
645645

646646
}
647647

648648
; GCN-LABEL: {{^}}mul_inline_imm_neg_4.0_v2i16:
649-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc400c400
650-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
649+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xc400
650+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
651651

652-
; GFX10: v_pk_mul_lo_u16 v0, 0xc400c400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0x00,0xc4]
652+
; GFX10: v_pk_mul_lo_u16 v0, 0xffffc400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0xff,0xff]
653653
define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
654654
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>)
655655
ret <2 x i16> %y
656656
}
657657

658658
; GCN-LABEL: {{^}}mul_inline_imm_inv2pi_v2i16:
659-
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x31183118
660-
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
659+
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3118
660+
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
661661

662-
; GFX10: v_pk_mul_lo_u16 v0, 0x31183118, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x18,0x31]
662+
; GFX10: v_pk_mul_lo_u16 v0, 0x3118, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x00,0x00]
663663
define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
664664
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
665665
ret <2 x i16> %y

0 commit comments

Comments
 (0)