From 6e0ed50893c698cd94db1cad0a5274495007630c Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Mon, 6 Nov 2023 16:57:58 -0800 Subject: [PATCH] [GlobalISel] Fall back for bf16 conversions. We don't support these correctly since we don't yet have FP types. AMDGPU tests were silently miscompiling bf16 as if they were fp16. --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 3 + .../test/CodeGen/AMDGPU/fmed3-cast-combine.ll | 386 ++++++------------ llvm/test/CodeGen/AMDGPU/llvm.exp.ll | 219 ++++------ llvm/test/CodeGen/AMDGPU/llvm.exp2.ll | 39 +- llvm/test/CodeGen/AMDGPU/llvm.log.ll | 259 +++++------- llvm/test/CodeGen/AMDGPU/llvm.log10.ll | 259 +++++------- llvm/test/CodeGen/AMDGPU/llvm.log2.ll | 73 ++-- 7 files changed, 430 insertions(+), 808 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index d8f9e30b25997..3098c8ea468a9 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1484,6 +1484,9 @@ bool IRTranslator::translateBitCast(const User &U, bool IRTranslator::translateCast(unsigned Opcode, const User &U, MachineIRBuilder &MIRBuilder) { + if (U.getType()->getScalarType()->isBFloatTy() || + U.getOperand(0)->getType()->getScalarType()->isBFloatTy()) + return false; Register Op = getOrCreateVReg(*U.getOperand(0)); Register Res = getOrCreateVReg(U); MIRBuilder.buildInstr(Opcode, {Res}, {Op}); diff --git a/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll b/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll index e3457421a4903..e9bf515daabca 100644 --- a/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll @@ -1,15 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; Test no legal f16. Should just keep the cast to f32 and ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-SDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-GISEL %s ; Test legal f16, no f16 fmed3. Should expand to min/max sequence ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s ; Legal f16 med3. InstCombine ought to shrink the f32 op to f16 so the codegen doesn't really matter for this. ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s declare float @llvm.amdgcn.fmed3.f32(float, float, float) #0 @@ -773,61 +773,32 @@ define half @fmed3_fneg_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) # ; -------------------------------------------------------------------------------- define bfloat @fmed3_f32_fpext_f16_fptrunc_bf16(half %arg0, half %arg1, half %arg2) #1 { -; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16: -; GFX7-SDAG: ; %bb.0: -; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16: -; GFX7-GISEL: ; %bb.0: -; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16: -; GFX8-SDAG: ; %bb.0: -; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX8-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16: -; GFX8-GISEL: ; %bb.0: -; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1 -; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2 -; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX7-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_med3_f32 v0, v0, v1, v2 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX8-NEXT: v_med3_f32 v0, v0, v1, v2 +; GFX8-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] %arg0.ext = fpext half %arg0 to float %arg1.ext = fpext half %arg1 to float %arg2.ext = fpext half %arg2 to float @@ -1039,56 +1010,27 @@ define half @fmed3_f32_fpext_f16_multi_use_2(half %arg0, half %arg1, half %arg2, } define half @fmed3_f32_fpext_bf16(bfloat %arg0, bfloat %arg1, bfloat %arg2) #1 { -; GFX7-SDAG-LABEL: fmed3_f32_fpext_bf16: -; GFX7-SDAG: ; %bb.0: -; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX7-GISEL-LABEL: fmed3_f32_fpext_bf16: -; GFX7-GISEL: ; %bb.0: -; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-SDAG-LABEL: fmed3_f32_fpext_bf16: -; GFX8-SDAG: ; %bb.0: -; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-GISEL-LABEL: fmed3_f32_fpext_bf16: -; GFX8-GISEL: ; %bb.0: -; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1 -; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2 -; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SDAG-LABEL: fmed3_f32_fpext_bf16: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX7-LABEL: fmed3_f32_fpext_bf16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_med3_f32 v0, v0, v1, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: fmed3_f32_fpext_bf16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_med3_f32 v0, v0, v1, v2 +; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: fmed3_f32_fpext_bf16: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: fmed3_f32_fpext_bf16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] %arg0.ext = fpext bfloat %arg0 to float %arg1.ext = fpext bfloat %arg1 to float %arg2.ext = fpext bfloat %arg2 to float @@ -1098,60 +1040,31 @@ define half @fmed3_f32_fpext_bf16(bfloat %arg0, bfloat %arg1, bfloat %arg2) #1 { } define half @fmed3_f32_fpext_f16_bf16_0(bfloat %arg0, half %arg1, half %arg2) #1 { -; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_0: -; GFX7-SDAG: ; %bb.0: -; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_0: -; GFX7-GISEL: ; %bb.0: -; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_0: -; GFX8-SDAG: ; %bb.0: -; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_0: -; GFX8-GISEL: ; %bb.0: -; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1 -; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2 -; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_0: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX7-LABEL: fmed3_f32_fpext_f16_bf16_0: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_med3_f32 v0, v0, v1, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: fmed3_f32_fpext_f16_bf16_0: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX8-NEXT: v_med3_f32 v0, v0, v1, v2 +; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_0: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: fmed3_f32_fpext_f16_bf16_0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] %arg0.ext = fpext bfloat %arg0 to float %arg1.ext = fpext half %arg1 to float %arg2.ext = fpext half %arg2 to float @@ -1161,60 +1074,31 @@ define half @fmed3_f32_fpext_f16_bf16_0(bfloat %arg0, half %arg1, half %arg2) #1 } define half @fmed3_f32_fpext_f16_bf16_1(half %arg0, bfloat %arg1, half %arg2) #1 { -; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_1: -; GFX7-SDAG: ; %bb.0: -; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_1: -; GFX7-GISEL: ; %bb.0: -; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_1: -; GFX8-SDAG: ; %bb.0: -; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_1: -; GFX8-GISEL: ; %bb.0: -; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1 -; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2 -; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_1: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX7-LABEL: fmed3_f32_fpext_f16_bf16_1: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_med3_f32 v0, v0, v1, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: fmed3_f32_fpext_f16_bf16_1: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX8-NEXT: v_med3_f32 v0, v0, v1, v2 +; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_1: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: fmed3_f32_fpext_f16_bf16_1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] %arg0.ext = fpext half %arg0 to float %arg1.ext = fpext bfloat %arg1 to float %arg2.ext = fpext half %arg2 to float @@ -1224,60 +1108,31 @@ define half @fmed3_f32_fpext_f16_bf16_1(half %arg0, bfloat %arg1, half %arg2) #1 } define half @fmed3_f32_fpext_f16_bf16_2(half %arg0, half %arg1, bfloat %arg2) #1 { -; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_2: -; GFX7-SDAG: ; %bb.0: -; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_2: -; GFX7-GISEL: ; %bb.0: -; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_2: -; GFX8-SDAG: ; %bb.0: -; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_2: -; GFX8-GISEL: ; %bb.0: -; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1 -; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2 -; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_2: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX7-LABEL: fmed3_f32_fpext_f16_bf16_2: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_med3_f32 v0, v0, v1, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: fmed3_f32_fpext_f16_bf16_2: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX8-NEXT: v_med3_f32 v0, v0, v1, v2 +; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_2: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2 -; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: fmed3_f32_fpext_f16_bf16_2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] %arg0.ext = fpext half %arg0 to float %arg1.ext = fpext half %arg1 to float %arg2.ext = fpext bfloat %arg2 to float @@ -1488,4 +1343,3 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GCN: {{.*}} -; GFX7: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll index 16ee31a875b39..b8df8b3d05727 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GCN-SDAG,VI,VI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,VI,VI-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,VI,VI-GISEL %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-SDAG,GFX900,GFX900-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,GFX900,GFX900-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,GFX900,GFX900-GISEL %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-GISEL %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s @@ -5763,152 +5763,77 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { } define float @v_exp_f32_from_fpext_bf16(bfloat %src) { -; VI-SDAG-LABEL: v_exp_f32_from_fpext_bf16: -; VI-SDAG: ; %bb.0: -; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 -; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 -; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 -; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; VI-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; VI-GISEL-LABEL: v_exp_f32_from_fpext_bf16: -; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 -; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 -; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc -; VI-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_bf16: -; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f -; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_bf16: -; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2 -; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v2 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4 -; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 -; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 -; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc -; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; VI-LABEL: v_exp_f32_from_fpext_bf16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-NEXT: v_sub_f32_e32 v4, v0, v1 +; VI-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 +; VI-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 +; VI-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 +; VI-NEXT: v_rndne_f32_e32 v3, v2 +; VI-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 +; VI-NEXT: v_sub_f32_e32 v2, v2, v3 +; VI-NEXT: v_add_f32_e32 v1, v1, v4 +; VI-NEXT: v_add_f32_e32 v1, v2, v1 +; VI-NEXT: v_exp_f32_e32 v1, v1 +; VI-NEXT: v_cvt_i32_f32_e32 v2, v3 +; VI-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; VI-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; VI-NEXT: s_mov_b32 s4, 0x42b17218 +; VI-NEXT: v_ldexp_f32 v1, v1, v2 +; VI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; VI-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-NEXT: s_setpc_b64 s[30:31] ; -; SI-SDAG-LABEL: v_exp_f32_from_fpext_bf16: -; SI-SDAG: ; %bb.0: -; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 -; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f -; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 -; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 -; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; SI-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_exp_f32_from_fpext_bf16: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; GFX900-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; GFX900-NEXT: v_rndne_f32_e32 v2, v1 +; GFX900-NEXT: v_sub_f32_e32 v3, v1, v2 +; GFX900-NEXT: v_fma_f32 v1, v0, s4, -v1 +; GFX900-NEXT: s_mov_b32 s4, 0x32a5705f +; GFX900-NEXT: v_fma_f32 v1, v0, s4, v1 +; GFX900-NEXT: v_add_f32_e32 v1, v3, v1 +; GFX900-NEXT: v_exp_f32_e32 v1, v1 +; GFX900-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX900-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; GFX900-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; GFX900-NEXT: s_mov_b32 s4, 0x42b17218 +; GFX900-NEXT: v_ldexp_f32 v1, v1, v2 +; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; SI-GISEL-LABEL: v_exp_f32_from_fpext_bf16: -; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f -; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 -; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2 -; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3 -; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4 -; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 -; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 -; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc -; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-LABEL: v_exp_f32_from_fpext_bf16: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 +; SI-NEXT: s_mov_b32 s4, 0x3fb8aa3b +; SI-NEXT: v_rndne_f32_e32 v2, v1 +; SI-NEXT: v_sub_f32_e32 v3, v1, v2 +; SI-NEXT: v_fma_f32 v1, v0, s4, -v1 +; SI-NEXT: s_mov_b32 s4, 0x32a5705f +; SI-NEXT: v_fma_f32 v1, v0, s4, v1 +; SI-NEXT: v_add_f32_e32 v1, v3, v1 +; SI-NEXT: v_exp_f32_e32 v1, v1 +; SI-NEXT: v_cvt_i32_f32_e32 v2, v2 +; SI-NEXT: s_mov_b32 s4, 0xc2ce8ed0 +; SI-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 +; SI-NEXT: s_mov_b32 s4, 0x42b17218 +; SI-NEXT: v_ldexp_f32_e32 v1, v1, v2 +; SI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; SI-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 +; SI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_from_fpext_bf16: ; R600: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll index 0c49338bfcab9..942b742451dfe 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-SDAG,SI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-GISEL,SI-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-GISEL,SI-GISEL %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-SDAG,VI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-GISEL,VI-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-GISEL,VI-GISEL %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-SDAG,GFX900-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-GISEL,GFX900-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-GISEL,GFX900-GISEL %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s @@ -1992,26 +1992,19 @@ define float @v_exp2_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { } define float @v_exp2_f32_from_fpext_bf16(bfloat %src) { -; GCN-SDAG-LABEL: v_exp2_f32_from_fpext_bf16: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GCN-GISEL-LABEL: v_exp2_f32_from_fpext_bf16: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_exp2_f32_from_fpext_bf16: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GCN-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GCN-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GCN-NEXT: v_add_f32_e32 v0, v0, v2 +; GCN-NEXT: v_exp_f32_e32 v0, v0 +; GCN-NEXT: v_mov_b32_e32 v1, 0x1f800000 +; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GCN-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp2_f32_from_fpext_bf16: ; R600: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll index 528232a203acf..ab2618934c4d9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-SDAG,SI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-GISEL,SI-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-GISEL,SI-GISEL %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-SDAG,VI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-GISEL,VI-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-GISEL,VI-GISEL %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-SDAG,GFX900-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-GISEL,GFX900-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-GISEL,GFX900-GISEL %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-GISEL %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s @@ -6243,167 +6243,99 @@ define float @v_log_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { } define float @v_log_f32_from_fpext_bf16(bfloat %src) { -; SI-SDAG-LABEL: v_log_f32_from_fpext_bf16: -; SI-SDAG: ; %bb.0: -; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 -; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; SI-GISEL-LABEL: v_log_f32_from_fpext_bf16: -; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 -; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 -; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; SI-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; VI-SDAG-LABEL: v_log_f32_from_fpext_bf16: -; VI-SDAG: ; %bb.0: -; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 -; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 -; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 -; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; VI-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; VI-GISEL-LABEL: v_log_f32_from_fpext_bf16: -; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 -; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 -; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 -; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; VI-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-SDAG-LABEL: v_log_f32_from_fpext_bf16: -; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-GISEL-LABEL: v_log_f32_from_fpext_bf16: -; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 -; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-LABEL: v_log_f32_from_fpext_bf16: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: s_mov_b32 s4, 0x800000 +; SI-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-NEXT: v_log_f32_e32 v0, v0 +; SI-NEXT: s_mov_b32 s4, 0x3f317217 +; SI-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; SI-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-NEXT: s_mov_b32 s4, 0x3377d1cf +; SI-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; SI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: v_log_f32_from_fpext_bf16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: s_mov_b32 s4, 0x800000 +; VI-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-NEXT: v_log_f32_e32 v0, v0 +; VI-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2 +; VI-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2 +; VI-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 +; VI-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-SDAG-LABEL: v_log_f32_from_fpext_bf16: -; GFX1100-SDAG: ; %bb.0: -; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_log_f32_from_fpext_bf16: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-NEXT: v_log_f32_e32 v0, v0 +; GFX900-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX900-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-NEXT: s_mov_b32 s4, 0x3377d1cf +; GFX900-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-GISEL-LABEL: v_log_f32_from_fpext_bf16: -; GFX1100-GISEL: ; %bb.0: -; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 -; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo -; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: v_log_f32_from_fpext_bf16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 +; GFX1100-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log_f32_from_fpext_bf16: ; R600: ; %bb.0: @@ -7800,4 +7732,3 @@ attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memo ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX689-GISEL: {{.*}} ; GFX689-SDAG: {{.*}} -; SI: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll index 2e5bf2e560951..c732483733f5a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-SDAG,SI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-GISEL,SI-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-GISEL,SI-GISEL %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-SDAG,VI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-GISEL,VI-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-GISEL,VI-GISEL %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-SDAG,GFX900-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-GISEL,GFX900-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-GISEL,GFX900-GISEL %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-GISEL %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s @@ -6243,167 +6243,99 @@ define float @v_log10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { } define float @v_log10_f32_from_fpext_bf16(bfloat %src) { -; SI-SDAG-LABEL: v_log10_f32_from_fpext_bf16: -; SI-SDAG: ; %bb.0: -; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a -; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 -; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; SI-GISEL-LABEL: v_log10_f32_from_fpext_bf16: -; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 -; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 -; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 -; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; SI-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; VI-SDAG-LABEL: v_log10_f32_from_fpext_bf16: -; VI-SDAG: ; %bb.0: -; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 -; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2 -; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 -; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 -; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; VI-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; VI-GISEL-LABEL: v_log10_f32_from_fpext_bf16: -; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 -; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 -; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 -; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; VI-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-SDAG-LABEL: v_log10_f32_from_fpext_bf16: -; GFX900-SDAG: ; %bb.0: -; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf -; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 -; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX900-GISEL-LABEL: v_log10_f32_from_fpext_bf16: -; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4 -; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-LABEL: v_log10_f32_from_fpext_bf16: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: s_mov_b32 s4, 0x800000 +; SI-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-NEXT: v_log_f32_e32 v0, v0 +; SI-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; SI-NEXT: v_fma_f32 v2, v0, s4, -v1 +; SI-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; SI-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: v_log10_f32_from_fpext_bf16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: s_mov_b32 s4, 0x800000 +; VI-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; VI-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-NEXT: v_log_f32_e32 v0, v0 +; VI-NEXT: s_mov_b32 s4, 0x7f800000 +; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 +; VI-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2 +; VI-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2 +; VI-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 +; VI-NEXT: v_add_f32_e32 v2, v4, v2 +; VI-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-NEXT: v_add_f32_e32 v1, v1, v2 +; VI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; VI-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; VI-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-SDAG-LABEL: v_log10_f32_from_fpext_bf16: -; GFX1100-SDAG: ; %bb.0: -; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_log10_f32_from_fpext_bf16: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-NEXT: v_log_f32_e32 v0, v0 +; GFX900-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX900-NEXT: v_fma_f32 v2, v0, s4, -v1 +; GFX900-NEXT: s_mov_b32 s4, 0x3284fbcf +; GFX900-NEXT: v_fma_f32 v2, v0, s4, v2 +; GFX900-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX900-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 +; GFX900-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] +; GFX900-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-GISEL-LABEL: v_log10_f32_from_fpext_bf16: -; GFX1100-GISEL: ; %bb.0: -; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 -; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo -; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: v_log10_f32_from_fpext_bf16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log10_f32_from_fpext_bf16: ; R600: ; %bb.0: @@ -7800,4 +7732,3 @@ attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memo ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX689-GISEL: {{.*}} ; GFX689-SDAG: {{.*}} -; SI: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll index d499e017e92f4..4c4b678010adf 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-SDAG,SI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-GISEL,SI-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-GISEL,SI-GISEL %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-SDAG,VI-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-GISEL,VI-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-GISEL,VI-GISEL %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-SDAG,GFX900-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-GISEL,GFX900-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-GISEL,GFX900-GISEL %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-SDAG %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-GISEL %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-GISEL %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s @@ -2751,47 +2751,32 @@ define float @v_log2_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { } define float @v_log2_f32_from_fpext_bf16(bfloat %src) { -; GFX689-SDAG-LABEL: v_log2_f32_from_fpext_bf16: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX689-GISEL-LABEL: v_log2_f32_from_fpext_bf16: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-SDAG-LABEL: v_log2_f32_from_fpext_bf16: -; GFX1100-SDAG: ; %bb.0: -; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX689-LABEL: v_log2_f32_from_fpext_bf16: +; GFX689: ; %bb.0: +; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX689-NEXT: s_mov_b32 s4, 0x800000 +; GFX689-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX689-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GFX689-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; GFX689-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX689-NEXT: v_log_f32_e32 v0, v0 +; GFX689-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX689-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX689-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX689-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-GISEL-LABEL: v_log2_f32_from_fpext_bf16: -; GFX1100-GISEL: ; %bb.0: -; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: v_log2_f32_from_fpext_bf16: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_from_fpext_bf16: ; R600: ; %bb.0: