diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/bitcast-fold-lane-ops.ll b/llvm/test/Transforms/InstCombine/AMDGPU/bitcast-fold-lane-ops.ll new file mode 100644 index 0000000000000..d4dae239b1e7d --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AMDGPU/bitcast-fold-lane-ops.ll @@ -0,0 +1,315 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S < %s | FileCheck %s + +define i32 @test_bitcast_f32_to_i32_readfirstlane(float %val) { +; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_readfirstlane( +; CHECK-SAME: float [[VAL:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32 +; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]]) +; CHECK-NEXT: ret i32 [[RESULT]] +; + %bitcast = bitcast float %val to i32 + %result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast) + ret i32 %result +} + +define i32 @test_bitcast_f32_to_i32_readfirstlane_multi_use_store(float %val, ptr %use.ptr) { +; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_readfirstlane_multi_use_store( +; CHECK-SAME: float [[VAL:%.*]], ptr [[USE_PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32 +; CHECK-NEXT: store float [[VAL]], ptr [[USE_PTR]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]]) +; CHECK-NEXT: ret i32 [[TMP1]] +; + %bitcast = bitcast float %val to i32 + store i32 %bitcast, ptr %use.ptr + %result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast) + ret i32 %result +} + +declare void @use.i32(i32) + +define i32 @test_bitcast_f32_to_i32_readfirstlane_multi_use_call(float %val) { +; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_readfirstlane_multi_use_call( +; CHECK-SAME: float [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32 +; CHECK-NEXT: call void @use.i32(i32 [[BITCAST]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]]) +; CHECK-NEXT: ret i32 [[TMP1]] +; + %bitcast = bitcast float %val to i32 + call void @use.i32(i32 %bitcast) + %result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast) + ret i32 %result +} + +define float @test_bitcast_f32_to_i32_readfirstlane_bitcast(float %val) { +; CHECK-LABEL: define float @test_bitcast_f32_to_i32_readfirstlane_bitcast( +; CHECK-SAME: float [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]]) +; CHECK-NEXT: [[RESULT:%.*]] = bitcast i32 [[CALL]] to float +; CHECK-NEXT: ret float [[RESULT]] +; + %bitcast = bitcast float %val to i32 + %call = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast) + %result = bitcast i32 %call to float + ret float %result +} + +define i32 @test_bitcast_v2f16_to_i32_readfirstlane(<2 x half> %val) { +; CHECK-LABEL: define i32 @test_bitcast_v2f16_to_i32_readfirstlane( +; CHECK-SAME: <2 x half> [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <2 x half> [[VAL]] to i32 +; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]]) +; CHECK-NEXT: ret i32 [[RESULT]] +; + %bitcast = bitcast <2 x half> %val to i32 + %result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast) + ret i32 %result +} + +define i32 @test_bitcast_v2bf16_to_i32_readfirstlane(<2 x bfloat> %val) { +; CHECK-LABEL: define i32 @test_bitcast_v2bf16_to_i32_readfirstlane( +; CHECK-SAME: <2 x bfloat> [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <2 x bfloat> [[VAL]] to i32 +; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]]) +; CHECK-NEXT: ret i32 [[RESULT]] +; + %bitcast = bitcast <2 x bfloat> %val to i32 + %result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast) + ret i32 %result +} + +define i64 @test_bitcast_f64_to_i64_readfirstlane(double %val) { +; CHECK-LABEL: define i64 @test_bitcast_f64_to_i64_readfirstlane( +; CHECK-SAME: double [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast double [[VAL]] to i64 +; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.readfirstlane.i64(i64 [[BITCAST]]) +; CHECK-NEXT: ret i64 [[RESULT]] +; + %bitcast = bitcast double %val to i64 + %result = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %bitcast) + ret i64 %result +} + +define <2 x i32> @test_bitcast_f64_to_v2i32_readfirstlane(double %val) { +; CHECK-LABEL: define <2 x i32> @test_bitcast_f64_to_v2i32_readfirstlane( +; CHECK-SAME: double [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast double [[VAL]] to <2 x i32> +; CHECK-NEXT: [[RESULT:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[BITCAST]]) +; CHECK-NEXT: ret <2 x i32> [[RESULT]] +; + %bitcast = bitcast double %val to <2 x i32> + %result = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %bitcast) + ret <2 x i32> %result +} + +define i64 @test_bitcast_v4i16_to_i64_readfirstlane(<4 x i16> %val) { +; CHECK-LABEL: define i64 @test_bitcast_v4i16_to_i64_readfirstlane( +; CHECK-SAME: <4 x i16> [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <4 x i16> [[VAL]] to i64 +; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.readfirstlane.i64(i64 [[BITCAST]]) +; CHECK-NEXT: ret i64 [[RESULT]] +; + %bitcast = bitcast <4 x i16> %val to i64 + %result = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %bitcast) + ret i64 %result +} + + +define i32 @test_bitcast_v4i8_to_i32_readfirstlane(<4 x i8> %val) { +; CHECK-LABEL: define i32 @test_bitcast_v4i8_to_i32_readfirstlane( +; CHECK-SAME: <4 x i8> [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <4 x i8> [[VAL]] to i32 +; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]]) +; CHECK-NEXT: ret i32 [[RESULT]] +; + %bitcast = bitcast <4 x i8> %val to i32 + %result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast) + ret i32 %result +} + +define i32 @test_bitcast_v8i4_to_i32_readfirstlane(<8 x i4> %val) { +; CHECK-LABEL: define i32 @test_bitcast_v8i4_to_i32_readfirstlane( +; CHECK-SAME: <8 x i4> [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <8 x i4> [[VAL]] to i32 +; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]]) +; CHECK-NEXT: ret i32 [[RESULT]] +; + %bitcast = bitcast <8 x i4> %val to i32 + %result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast) + ret i32 %result +} + +define float @test_bitcast_i32_to_f32_readfirstlane(i32 %val) { +; CHECK-LABEL: define float @test_bitcast_i32_to_f32_readfirstlane( +; CHECK-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32 [[VAL]] to float +; CHECK-NEXT: [[RESULT:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[BITCAST]]) +; CHECK-NEXT: ret float [[RESULT]] +; + %bitcast = bitcast i32 %val to float + %result = call float @llvm.amdgcn.readfirstlane.f32(float %bitcast) + ret float %result +} + +define i16 @test_bitcast_f16_to_i16_readfirstlane(half %val) { +; CHECK-LABEL: define i16 @test_bitcast_f16_to_i16_readfirstlane( +; CHECK-SAME: half [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast half [[VAL]] to i16 +; CHECK-NEXT: [[RESULT:%.*]] = call i16 @llvm.amdgcn.readfirstlane.i16(i16 [[BITCAST]]) +; CHECK-NEXT: ret i16 [[RESULT]] +; + %bitcast = bitcast half %val to i16 + %result = call i16 @llvm.amdgcn.readfirstlane.i16(i16 %bitcast) + ret i16 %result +} + +define i16 @test_bitcast_v2i8_to_i16_readfirstlane(<2 x i8> %val) { +; CHECK-LABEL: define i16 @test_bitcast_v2i8_to_i16_readfirstlane( +; CHECK-SAME: <2 x i8> [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <2 x i8> [[VAL]] to i16 +; CHECK-NEXT: [[RESULT:%.*]] = call i16 @llvm.amdgcn.readfirstlane.i16(i16 [[BITCAST]]) +; CHECK-NEXT: ret i16 [[RESULT]] +; + %bitcast = bitcast <2 x i8> %val to i16 + %result = call i16 @llvm.amdgcn.readfirstlane.i16(i16 %bitcast) + ret i16 %result +} + +define <16 x i32> @test_bitcast_v16f32_to_v16i32_readfirstlane(<16 x float> %val) { +; CHECK-LABEL: define <16 x i32> @test_bitcast_v16f32_to_v16i32_readfirstlane( +; CHECK-SAME: <16 x float> [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <16 x float> [[VAL]] to <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.amdgcn.readfirstlane.v16i32(<16 x i32> [[BITCAST]]) +; CHECK-NEXT: ret <16 x i32> [[TMP1]] +; + %bitcast = bitcast <16 x float> %val to <16 x i32> + %result = call <16 x i32> @llvm.amdgcn.readfirstlane.v16i32(<16 x i32> %bitcast) + ret <16 x i32> %result +} + +define <8 x i64> @test_bitcast_v16f32_to_v8i64_readfirstlane(<16 x float> %val) { +; CHECK-LABEL: define <8 x i64> @test_bitcast_v16f32_to_v8i64_readfirstlane( +; CHECK-SAME: <16 x float> [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <16 x float> [[VAL]] to <8 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.amdgcn.readfirstlane.v8i64(<8 x i64> [[BITCAST]]) +; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; + %bitcast = bitcast <16 x float> %val to <8 x i64> + %result = call <8 x i64> @llvm.amdgcn.readfirstlane.v8i64(<8 x i64> %bitcast) + ret <8 x i64> %result +} + +define i32 @test_bitcast_f32_to_i32_readlane(float %val, i32 inreg %lane.index) { +; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_readlane( +; CHECK-SAME: float [[VAL:%.*]], i32 inreg [[LANE_INDEX:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32 +; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[BITCAST]], i32 [[LANE_INDEX]]) +; CHECK-NEXT: ret i32 [[RESULT]] +; + %bitcast = bitcast float %val to i32 + %result = call i32 @llvm.amdgcn.readlane.i32(i32 %bitcast, i32 %lane.index) + ret i32 %result +} + +define i32 @test_bitcast_f32_to_i32_writelane_samesourcetype(float %val0, i32 inreg %lane.index, float %val1) { +; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_writelane_samesourcetype( +; CHECK-SAME: float [[VAL0:%.*]], i32 inreg [[LANE_INDEX:%.*]], float [[VAL1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST0:%.*]] = bitcast float [[VAL0]] to i32 +; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast float [[VAL1]] to i32 +; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[BITCAST0]], i32 [[LANE_INDEX]], i32 [[BITCAST1]]) +; CHECK-NEXT: ret i32 [[RESULT]] +; + %bitcast0 = bitcast float %val0 to i32 + %bitcast1 = bitcast float %val1 to i32 + %result = call i32 @llvm.amdgcn.writelane.i32(i32 %bitcast0, i32 %lane.index, i32 %bitcast1) + ret i32 %result +} + +define i32 @test_bitcast_f32_to_i32_writelane_diffsourcetype(float %val0, i32 inreg %lane.index, <2 x half> %val1) { +; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_writelane_diffsourcetype( +; CHECK-SAME: float [[VAL0:%.*]], i32 inreg [[LANE_INDEX:%.*]], <2 x half> [[VAL1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST0:%.*]] = bitcast float [[VAL0]] to i32 +; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <2 x half> [[VAL1]] to i32 +; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[BITCAST0]], i32 [[LANE_INDEX]], i32 [[BITCAST1]]) +; CHECK-NEXT: ret i32 [[RESULT]] +; + %bitcast0 = bitcast float %val0 to i32 + %bitcast1 = bitcast <2 x half> %val1 to i32 + %result = call i32 @llvm.amdgcn.writelane.i32(i32 %bitcast0, i32 %lane.index, i32 %bitcast1) + ret i32 %result +} + +define i32 @test_bitcast_update_dpp_f32_to_i32(float %val0, float %val1) { +; CHECK-LABEL: define i32 @test_bitcast_update_dpp_f32_to_i32( +; CHECK-SAME: float [[VAL0:%.*]], float [[VAL1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST0:%.*]] = bitcast float [[VAL0]] to i32 +; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast float [[VAL1]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 [[BITCAST0]], i32 [[BITCAST1]], i32 1, i32 1, i32 1, i1 false) +; CHECK-NEXT: ret i32 [[TMP0]] +; + %bitcast0 = bitcast float %val0 to i32 + %bitcast1 = bitcast float %val1 to i32 + %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 %bitcast0, i32 %bitcast1, i32 1, i32 1, i32 1, i1 false) + ret i32 %dpp +} + +define i32 @test_bitcast_update_dpp_f32_to_i32_convergencetoken(float %val0, float %val1) convergent { +; CHECK-LABEL: define i32 @test_bitcast_update_dpp_f32_to_i32_convergencetoken( +; CHECK-SAME: float [[VAL0:%.*]], float [[VAL1:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: [[BITCAST0:%.*]] = bitcast float [[VAL0]] to i32 +; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast float [[VAL1]] to i32 +; CHECK-NEXT: [[DPP:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 [[BITCAST0]], i32 [[BITCAST1]], i32 1, i32 1, i32 1, i1 false) [ "convergencectrl"(token [[T]]) ] +; CHECK-NEXT: ret i32 [[DPP]] +; + %t = call token @llvm.experimental.convergence.entry() + %bitcast0 = bitcast float %val0 to i32 + %bitcast1 = bitcast float %val1 to i32 + %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 %bitcast0, i32 %bitcast1, i32 1, i32 1, i32 1, i1 false) [ "convergencectrl"(token %t) ] + ret i32 %dpp +} + +define i32 @test_bitcast_update_dpp_sources_different_type(float %val0, <2 x half> %val1) { +; CHECK-LABEL: define i32 @test_bitcast_update_dpp_sources_different_type( +; CHECK-SAME: float [[VAL0:%.*]], <2 x half> [[VAL1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[BITCAST0:%.*]] = bitcast float [[VAL0]] to i32 +; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <2 x half> [[VAL1]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 [[BITCAST0]], i32 [[BITCAST1]], i32 1, i32 1, i32 1, i1 false) +; CHECK-NEXT: ret i32 [[TMP0]] +; + %bitcast0 = bitcast float %val0 to i32 + %bitcast1 = bitcast <2 x half> %val1 to i32 + %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 %bitcast0, i32 %bitcast1, i32 1, i32 1, i32 1, i1 false) + ret i32 %dpp +} + +define i32 @test_bitcast_f32_to_i32_readfirstlane_convergencetoken(float %val) convergent { +; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_readfirstlane_convergencetoken( +; CHECK-SAME: float [[VAL:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32 +; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]]) [ "convergencectrl"(token [[T]]) ] +; CHECK-NEXT: ret i32 [[RESULT]] +; + %t = call token @llvm.experimental.convergence.entry() + %bitcast = bitcast float %val to i32 + %result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast) [ "convergencectrl"(token %t) ] + ret i32 %result +} + +define i32 @test_bitcast_f32_to_i32_readlane_convergencetoken(float %val, i32 inreg %lane.index) convergent { +; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_readlane_convergencetoken( +; CHECK-SAME: float [[VAL:%.*]], i32 inreg [[LANE_INDEX:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32 +; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[BITCAST]], i32 [[LANE_INDEX]]) [ "convergencectrl"(token [[T]]) ] +; CHECK-NEXT: ret i32 [[RESULT]] +; + %t = call token @llvm.experimental.convergence.entry() + %bitcast = bitcast float %val to i32 + %result = call i32 @llvm.amdgcn.readlane.i32(i32 %bitcast, i32 %lane.index) [ "convergencectrl"(token %t) ] + ret i32 %result +} diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/permlane64.ll b/llvm/test/Transforms/InstCombine/AMDGPU/permlane64.ll index 12506d17ef3d9..3908f0b778508 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/permlane64.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/permlane64.ll @@ -6,8 +6,32 @@ define i32 @test_constant() { ; CHECK-LABEL: @test_constant( ; CHECK-NEXT: ret i32 99 ; - %call = call i32 @llvm.amdgcn.permlane64(i32 99) + %call = call i32 @llvm.amdgcn.permlane64.i32(i32 99) ret i32 %call } -declare i32 @llvm.amdgcn.permlane64(i32) +define i32 @test_bitcast_f32_to_i32_permlane64(float %val) { +; CHECK-LABEL: @test_bitcast_f32_to_i32_permlane64( +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL:%.*]] to i32 +; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.permlane64.i32(i32 [[BITCAST]]) +; CHECK-NEXT: ret i32 [[RESULT]] +; + %bitcast = bitcast float %val to i32 + %result = call i32 @llvm.amdgcn.permlane64.i32(i32 %bitcast) + ret i32 %result +} + +define i32 @test_bitcast_f32_to_i32_permlane64_convergencetokenn(float %val) convergent { +; CHECK-LABEL: @test_bitcast_f32_to_i32_permlane64_convergencetokenn( +; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL:%.*]] to i32 +; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.permlane64.i32(i32 [[BITCAST]]) [ "convergencectrl"(token [[T]]) ] +; CHECK-NEXT: ret i32 [[RESULT]] +; + %t = call token @llvm.experimental.convergence.entry() + %bitcast = bitcast float %val to i32 + %result = call i32 @llvm.amdgcn.permlane64.i32(i32 %bitcast) [ "convergencectrl"(token %t) ] + ret i32 %result +} + +declare i32 @llvm.amdgcn.permlane64.i32(i32)