Skip to content

Commit 6b69584

Browse files
authored
[GlobalISel] Fall back for bf16 conversions. (#71470)
We don't support these correctly since we don't yet have FP types. AMDGPU tests were silently miscompiling bf16 as if they were fp16.
1 parent d08d9cf commit 6b69584

File tree

7 files changed

+430
-808
lines changed

7 files changed

+430
-808
lines changed

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -1485,6 +1485,9 @@ bool IRTranslator::translateBitCast(const User &U,
14851485

14861486
bool IRTranslator::translateCast(unsigned Opcode, const User &U,
14871487
MachineIRBuilder &MIRBuilder) {
1488+
if (U.getType()->getScalarType()->isBFloatTy() ||
1489+
U.getOperand(0)->getType()->getScalarType()->isBFloatTy())
1490+
return false;
14881491
Register Op = getOrCreateVReg(*U.getOperand(0));
14891492
Register Res = getOrCreateVReg(U);
14901493
MIRBuilder.buildInstr(Opcode, {Res}, {Op});

llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll

+120-266
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llvm.exp.ll

+72-147
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
22
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GCN-SDAG,VI,VI-SDAG %s
3-
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,VI,VI-GISEL %s
3+
; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,VI,VI-GISEL %s
44
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-SDAG,GFX900,GFX900-SDAG %s
5-
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,GFX900,GFX900-GISEL %s
5+
; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,GFX900,GFX900-GISEL %s
66
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-SDAG %s
7-
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-GISEL %s
7+
; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-GISEL %s
88

99
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s
1010
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s
@@ -5763,152 +5763,77 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
57635763
}
57645764

57655765
define float @v_exp_f32_from_fpext_bf16(bfloat %src) {
5766-
; VI-SDAG-LABEL: v_exp_f32_from_fpext_bf16:
5767-
; VI-SDAG: ; %bb.0:
5768-
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5769-
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5770-
; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
5771-
; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
5772-
; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
5773-
; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
5774-
; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
5775-
; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
5776-
; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5777-
; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
5778-
; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
5779-
; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
5780-
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5781-
; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
5782-
; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5783-
; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5784-
; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5785-
; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5786-
; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5787-
; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5788-
; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5789-
; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5790-
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
5791-
;
5792-
; VI-GISEL-LABEL: v_exp_f32_from_fpext_bf16:
5793-
; VI-GISEL: ; %bb.0:
5794-
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5795-
; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5796-
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5797-
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
5798-
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
5799-
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
5800-
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
5801-
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5802-
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
5803-
; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
5804-
; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
5805-
; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
5806-
; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
5807-
; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
5808-
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5809-
; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5810-
; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5811-
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
5812-
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
5813-
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5814-
; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5815-
; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5816-
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5817-
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
5818-
;
5819-
; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_bf16:
5820-
; GFX900-SDAG: ; %bb.0:
5821-
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5822-
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5823-
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5824-
; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
5825-
; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
5826-
; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
5827-
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
5828-
; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
5829-
; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
5830-
; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
5831-
; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
5832-
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5833-
; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5834-
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5835-
; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
5836-
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5837-
; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5838-
; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5839-
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5840-
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5841-
;
5842-
; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_bf16:
5843-
; GFX900-GISEL: ; %bb.0:
5844-
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5845-
; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5846-
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5847-
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
5848-
; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
5849-
; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
5850-
; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v2
5851-
; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
5852-
; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
5853-
; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
5854-
; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4
5855-
; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
5856-
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
5857-
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
5858-
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5859-
; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
5860-
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5861-
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5862-
; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5863-
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5864-
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5766+
; VI-LABEL: v_exp_f32_from_fpext_bf16:
5767+
; VI: ; %bb.0:
5768+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5769+
; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
5770+
; VI-NEXT: v_sub_f32_e32 v4, v0, v1
5771+
; VI-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
5772+
; VI-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
5773+
; VI-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
5774+
; VI-NEXT: v_rndne_f32_e32 v3, v2
5775+
; VI-NEXT: v_add_f32_e32 v4, v4, v5
5776+
; VI-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
5777+
; VI-NEXT: v_sub_f32_e32 v2, v2, v3
5778+
; VI-NEXT: v_add_f32_e32 v1, v1, v4
5779+
; VI-NEXT: v_add_f32_e32 v1, v2, v1
5780+
; VI-NEXT: v_exp_f32_e32 v1, v1
5781+
; VI-NEXT: v_cvt_i32_f32_e32 v2, v3
5782+
; VI-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5783+
; VI-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5784+
; VI-NEXT: s_mov_b32 s4, 0x42b17218
5785+
; VI-NEXT: v_ldexp_f32 v1, v1, v2
5786+
; VI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5787+
; VI-NEXT: v_mov_b32_e32 v2, 0x7f800000
5788+
; VI-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5789+
; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5790+
; VI-NEXT: s_setpc_b64 s[30:31]
58655791
;
5866-
; SI-SDAG-LABEL: v_exp_f32_from_fpext_bf16:
5867-
; SI-SDAG: ; %bb.0:
5868-
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5869-
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5870-
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5871-
; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
5872-
; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
5873-
; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
5874-
; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
5875-
; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
5876-
; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
5877-
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
5878-
; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
5879-
; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5880-
; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5881-
; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
5882-
; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
5883-
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5884-
; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
5885-
; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5886-
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5887-
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
5792+
; GFX900-LABEL: v_exp_f32_from_fpext_bf16:
5793+
; GFX900: ; %bb.0:
5794+
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5795+
; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5796+
; GFX900-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5797+
; GFX900-NEXT: v_rndne_f32_e32 v2, v1
5798+
; GFX900-NEXT: v_sub_f32_e32 v3, v1, v2
5799+
; GFX900-NEXT: v_fma_f32 v1, v0, s4, -v1
5800+
; GFX900-NEXT: s_mov_b32 s4, 0x32a5705f
5801+
; GFX900-NEXT: v_fma_f32 v1, v0, s4, v1
5802+
; GFX900-NEXT: v_add_f32_e32 v1, v3, v1
5803+
; GFX900-NEXT: v_exp_f32_e32 v1, v1
5804+
; GFX900-NEXT: v_cvt_i32_f32_e32 v2, v2
5805+
; GFX900-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5806+
; GFX900-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5807+
; GFX900-NEXT: s_mov_b32 s4, 0x42b17218
5808+
; GFX900-NEXT: v_ldexp_f32 v1, v1, v2
5809+
; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5810+
; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f800000
5811+
; GFX900-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5812+
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5813+
; GFX900-NEXT: s_setpc_b64 s[30:31]
58885814
;
5889-
; SI-GISEL-LABEL: v_exp_f32_from_fpext_bf16:
5890-
; SI-GISEL: ; %bb.0:
5891-
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5892-
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
5893-
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5894-
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
5895-
; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
5896-
; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
5897-
; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2
5898-
; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
5899-
; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
5900-
; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
5901-
; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4
5902-
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
5903-
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
5904-
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
5905-
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
5906-
; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
5907-
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
5908-
; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
5909-
; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
5910-
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
5911-
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
5815+
; SI-LABEL: v_exp_f32_from_fpext_bf16:
5816+
; SI: ; %bb.0:
5817+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5818+
; SI-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
5819+
; SI-NEXT: s_mov_b32 s4, 0x3fb8aa3b
5820+
; SI-NEXT: v_rndne_f32_e32 v2, v1
5821+
; SI-NEXT: v_sub_f32_e32 v3, v1, v2
5822+
; SI-NEXT: v_fma_f32 v1, v0, s4, -v1
5823+
; SI-NEXT: s_mov_b32 s4, 0x32a5705f
5824+
; SI-NEXT: v_fma_f32 v1, v0, s4, v1
5825+
; SI-NEXT: v_add_f32_e32 v1, v3, v1
5826+
; SI-NEXT: v_exp_f32_e32 v1, v1
5827+
; SI-NEXT: v_cvt_i32_f32_e32 v2, v2
5828+
; SI-NEXT: s_mov_b32 s4, 0xc2ce8ed0
5829+
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
5830+
; SI-NEXT: s_mov_b32 s4, 0x42b17218
5831+
; SI-NEXT: v_ldexp_f32_e32 v1, v1, v2
5832+
; SI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
5833+
; SI-NEXT: v_mov_b32_e32 v2, 0x7f800000
5834+
; SI-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
5835+
; SI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
5836+
; SI-NEXT: s_setpc_b64 s[30:31]
59125837
;
59135838
; R600-LABEL: v_exp_f32_from_fpext_bf16:
59145839
; R600: ; %bb.0:

llvm/test/CodeGen/AMDGPU/llvm.exp2.ll

+16-23
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
22
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-SDAG,SI-SDAG %s
3-
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-GISEL,SI-GISEL %s
3+
; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-GISEL,SI-GISEL %s
44
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-SDAG,VI-SDAG %s
5-
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-GISEL,VI-GISEL %s
5+
; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-GISEL,VI-GISEL %s
66
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-SDAG,GFX900-SDAG %s
7-
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-GISEL,GFX900-GISEL %s
7+
; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-GISEL,GFX900-GISEL %s
88

99
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s
1010
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s
@@ -1992,26 +1992,19 @@ define float @v_exp2_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
19921992
}
19931993

19941994
define float @v_exp2_f32_from_fpext_bf16(bfloat %src) {
1995-
; GCN-SDAG-LABEL: v_exp2_f32_from_fpext_bf16:
1996-
; GCN-SDAG: ; %bb.0:
1997-
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1998-
; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1999-
; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
2000-
; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
2001-
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
2002-
; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
2003-
; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
2004-
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
2005-
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2006-
; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
2007-
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
2008-
;
2009-
; GCN-GISEL-LABEL: v_exp2_f32_from_fpext_bf16:
2010-
; GCN-GISEL: ; %bb.0:
2011-
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2012-
; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
2013-
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
2014-
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
1995+
; GCN-LABEL: v_exp2_f32_from_fpext_bf16:
1996+
; GCN: ; %bb.0:
1997+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1998+
; GCN-NEXT: s_mov_b32 s4, 0xc2fc0000
1999+
; GCN-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
2000+
; GCN-NEXT: v_mov_b32_e32 v2, 0x42800000
2001+
; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
2002+
; GCN-NEXT: v_add_f32_e32 v0, v0, v2
2003+
; GCN-NEXT: v_exp_f32_e32 v0, v0
2004+
; GCN-NEXT: v_mov_b32_e32 v1, 0x1f800000
2005+
; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
2006+
; GCN-NEXT: v_mul_f32_e32 v0, v0, v1
2007+
; GCN-NEXT: s_setpc_b64 s[30:31]
20152008
;
20162009
; R600-LABEL: v_exp2_f32_from_fpext_bf16:
20172010
; R600: ; %bb.0:

0 commit comments

Comments
 (0)