Skip to content

[ARM] Add tan intrinsic lowering #95439

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FTAN, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
Expand Down Expand Up @@ -875,6 +876,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
setOperationAction(ISD::FTAN, MVT::v2f64, Expand);
setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
Expand All @@ -897,6 +899,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
setOperationAction(ISD::FTAN, MVT::v4f32, Expand);
setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
Expand All @@ -914,6 +917,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
setOperationAction(ISD::FTAN, MVT::v2f32, Expand);
setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
Expand Down Expand Up @@ -1540,6 +1544,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
setOperationAction(ISD::FTAN, MVT::f16, Promote);
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
Expand Down
31 changes: 31 additions & 0 deletions llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,37 @@ L.entry:

declare <4 x float> @llvm.cos.v4f32(<4 x float>) nounwind readonly

define void @test_tan(ptr %X) nounwind {

; CHECK-LABEL: test_tan:

; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}}
; CHECK: movt [[reg0]], :upper16:{{.*}}
; CHECK: vld1.64

; CHECK: {{v?mov(.32)?}} r0,
; CHECK: bl {{.*}}tanf

; CHECK: {{v?mov(.32)?}} r0,
; CHECK: bl {{.*}}tanf

; CHECK: {{v?mov(.32)?}} r0,
; CHECK: bl {{.*}}tanf

; CHECK: {{v?mov(.32)?}} r0,
; CHECK: bl {{.*}}tanf

; CHECK: vst1.64

L.entry:
%0 = load <4 x float>, ptr @A, align 16
%1 = call <4 x float> @llvm.tan.v4f32(<4 x float> %0)
store <4 x float> %1, ptr %X, align 16
ret void
}

declare <4 x float> @llvm.tan.v4f32(<4 x float>) nounwind readonly

define void @test_exp(ptr %X) nounwind {

; CHECK-LABEL: test_exp:
Expand Down
18 changes: 18 additions & 0 deletions llvm/test/CodeGen/ARM/fp16-fullfp16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,23 @@ define void @test_cos(ptr %p) {
ret void
}

define void @test_tan(ptr %p) {
; CHECK-LABEL: test_tan:
; CHECK: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: vldr.16 s0, [r0]
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
; CHECK-NEXT: bl tanf
; CHECK-NEXT: vcvtb.f16.f32 s0, s0
; CHECK-NEXT: vstr.16 s0, [r4]
; CHECK-NEXT: pop {r4, pc}
%a = load half, ptr %p, align 2
%r = call half @llvm.tan.f16(half %a)
store half %r, ptr %p
ret void
}

define void @test_pow(ptr %p, ptr %q) {
; CHECK-LABEL: test_pow:
; CHECK: .save {r4, lr}
Expand Down Expand Up @@ -588,6 +605,7 @@ declare half @llvm.sqrt.f16(half %a)
declare half @llvm.powi.f16.i32(half %a, i32 %b)
declare half @llvm.sin.f16(half %a)
declare half @llvm.cos.f16(half %a)
declare half @llvm.tan.f16(half %a)
declare half @llvm.pow.f16(half %a, half %b)
declare half @llvm.exp.f16(half %a)
declare half @llvm.exp2.f16(half %a)
Expand Down
16 changes: 16 additions & 0 deletions llvm/test/CodeGen/ARM/fp16-promote.ll
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ declare half @llvm.sqrt.f16(half %a) #0
declare half @llvm.powi.f16.i32(half %a, i32 %b) #0
declare half @llvm.sin.f16(half %a) #0
declare half @llvm.cos.f16(half %a) #0
declare half @llvm.tan.f16(half %a) #0
declare half @llvm.pow.f16(half %a, half %b) #0
declare half @llvm.exp.f16(half %a) #0
declare half @llvm.exp2.f16(half %a) #0
Expand Down Expand Up @@ -472,6 +473,21 @@ define void @test_cos(ptr %p) #0 {
ret void
}

; CHECK-FP16-LABEL: test_tan:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: bl tanf
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_tan:
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl tanf
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_tan(ptr %p) #0 {
%a = load half, ptr %p, align 2
%r = call half @llvm.tan.f16(half %a)
store half %r, ptr %p
ret void
}

; CHECK-FP16-LABEL: test_pow:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
Expand Down
21 changes: 21 additions & 0 deletions llvm/test/CodeGen/ARM/vfloatintrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ define %v2f32 @test_v2f32.cos(%v2f32 %a) {
%1 = call %v2f32 @llvm.cos.v2f32(%v2f32 %a)
ret %v2f32 %1
}
; CHECK-LABEL: test_v2f32.tan:{{.*}}
define %v2f32 @test_v2f32.tan(%v2f32 %a) {
; CHECK: tan
%1 = call %v2f32 @llvm.tan.v2f32(%v2f32 %a)
ret %v2f32 %1
}
; CHECK-LABEL: test_v2f32.pow:{{.*}}
define %v2f32 @test_v2f32.pow(%v2f32 %a, %v2f32 %b) {
; CHECK: pow
Expand Down Expand Up @@ -112,6 +118,7 @@ declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0
declare %v2f32 @llvm.powi.v2f32.i32(%v2f32, i32) #0
declare %v2f32 @llvm.sin.v2f32(%v2f32) #0
declare %v2f32 @llvm.cos.v2f32(%v2f32) #0
declare %v2f32 @llvm.tan.v2f32(%v2f32) #0
declare %v2f32 @llvm.pow.v2f32(%v2f32, %v2f32) #0
declare %v2f32 @llvm.exp.v2f32(%v2f32) #0
declare %v2f32 @llvm.exp2.v2f32(%v2f32) #0
Expand Down Expand Up @@ -153,6 +160,12 @@ define %v4f32 @test_v4f32.cos(%v4f32 %a) {
%1 = call %v4f32 @llvm.cos.v4f32(%v4f32 %a)
ret %v4f32 %1
}
; CHECK-LABEL: test_v4f32.tan:{{.*}}
define %v4f32 @test_v4f32.tan(%v4f32 %a) {
; CHECK: tan
%1 = call %v4f32 @llvm.tan.v4f32(%v4f32 %a)
ret %v4f32 %1
}
; CHECK-LABEL: test_v4f32.pow:{{.*}}
define %v4f32 @test_v4f32.pow(%v4f32 %a, %v4f32 %b) {
; CHECK: pow
Expand Down Expand Up @@ -236,6 +249,7 @@ declare %v4f32 @llvm.sqrt.v4f32(%v4f32) #0
declare %v4f32 @llvm.powi.v4f32.i32(%v4f32, i32) #0
declare %v4f32 @llvm.sin.v4f32(%v4f32) #0
declare %v4f32 @llvm.cos.v4f32(%v4f32) #0
declare %v4f32 @llvm.tan.v4f32(%v4f32) #0
declare %v4f32 @llvm.pow.v4f32(%v4f32, %v4f32) #0
declare %v4f32 @llvm.exp.v4f32(%v4f32) #0
declare %v4f32 @llvm.exp2.v4f32(%v4f32) #0
Expand Down Expand Up @@ -277,6 +291,12 @@ define %v2f64 @test_v2f64.cos(%v2f64 %a) {
%1 = call %v2f64 @llvm.cos.v2f64(%v2f64 %a)
ret %v2f64 %1
}
; CHECK-LABEL: test_v2f64.tan:{{.*}}
define %v2f64 @test_v2f64.tan(%v2f64 %a) {
; CHECK: tan
%1 = call %v2f64 @llvm.tan.v2f64(%v2f64 %a)
ret %v2f64 %1
}
; CHECK-LABEL: test_v2f64.pow:{{.*}}
define %v2f64 @test_v2f64.pow(%v2f64 %a, %v2f64 %b) {
; CHECK: pow
Expand Down Expand Up @@ -361,6 +381,7 @@ declare %v2f64 @llvm.sqrt.v2f64(%v2f64) #0
declare %v2f64 @llvm.powi.v2f64.i32(%v2f64, i32) #0
declare %v2f64 @llvm.sin.v2f64(%v2f64) #0
declare %v2f64 @llvm.cos.v2f64(%v2f64) #0
declare %v2f64 @llvm.tan.v2f64(%v2f64) #0
declare %v2f64 @llvm.pow.v2f64(%v2f64, %v2f64) #0
declare %v2f64 @llvm.exp.v2f64(%v2f64) #0
declare %v2f64 @llvm.exp2.v2f64(%v2f64) #0
Expand Down
9 changes: 9 additions & 0 deletions llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,15 @@ define double @cos_d(double %a) {
ret double %1
}

declare double @llvm.tan.f64(double %Val)
define double @tan_d(double %a) {
; CHECK-LABEL: tan_d:
; SOFT: {{(bl|b)}} tan
; HARD: b tan
%1 = call double @llvm.tan.f64(double %a)
ret double %1
}

declare double @llvm.pow.f64(double %Val, double %power)
define double @pow_d(double %a, double %b) {
; CHECK-LABEL: pow_d:
Expand Down
9 changes: 9 additions & 0 deletions llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ define float @cos_f(float %a) {
ret float %1
}

declare float @llvm.tan.f32(float %Val)
define float @tan_f(float %a) {
; CHECK-LABEL: tan_f:
; SOFT: bl tanf
; HARD: b tanf
%1 = call float @llvm.tan.f32(float %a)
ret float %1
}

declare float @llvm.pow.f32(float %Val, float %power)
define float @pow_f(float %a, float %b) {
; CHECK-LABEL: pow_f:
Expand Down
111 changes: 111 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-fmath.ll
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,117 @@ entry:
ret <2 x double> %0
}

define arm_aapcs_vfpcc <4 x float> @tan_float32_t(<4 x float> %src) {
; CHECK-LABEL: tan_float32_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, r4, d9
; CHECK-NEXT: bl tanf
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl tanf
; CHECK-NEXT: vmov r4, r1, d8
; CHECK-NEXT: vmov s19, r0
; CHECK-NEXT: vmov s18, r5
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bl tanf
; CHECK-NEXT: vmov s17, r0
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl tanf
; CHECK-NEXT: vmov s16, r0
; CHECK-NEXT: vmov q0, q4
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
%0 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %src)
ret <4 x float> %0
}

define arm_aapcs_vfpcc <8 x half> @tan_float16_t(<8 x half> %src) {
; CHECK-LABEL: tan_float16_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vcvtb.f32.f16 s0, s16
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bl tanf
; CHECK-NEXT: vcvtt.f32.f16 s0, s16
; CHECK-NEXT: vmov s16, r0
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bl tanf
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vcvtb.f16.f32 s20, s16
; CHECK-NEXT: vcvtt.f16.f32 s20, s0
; CHECK-NEXT: vcvtb.f32.f16 s0, s17
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bl tanf
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vcvtb.f16.f32 s21, s0
; CHECK-NEXT: vcvtt.f32.f16 s0, s17
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bl tanf
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vcvtt.f16.f32 s21, s0
; CHECK-NEXT: vcvtb.f32.f16 s0, s18
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bl tanf
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vcvtb.f16.f32 s22, s0
; CHECK-NEXT: vcvtt.f32.f16 s0, s18
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bl tanf
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vcvtt.f16.f32 s22, s0
; CHECK-NEXT: vcvtb.f32.f16 s0, s19
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bl tanf
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vcvtb.f16.f32 s23, s0
; CHECK-NEXT: vcvtt.f32.f16 s0, s19
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bl tanf
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vcvtt.f16.f32 s23, s0
; CHECK-NEXT: vmov q0, q5
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r7, pc}
entry:
%0 = call fast <8 x half> @llvm.tan.v8f16(<8 x half> %src)
ret <8 x half> %0
}

define arm_aapcs_vfpcc <2 x double> @tan_float64_t(<2 x double> %src) {
; CHECK-LABEL: tan_float64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: bl tan
; CHECK-NEXT: vmov r2, r3, d8
; CHECK-NEXT: vmov d9, r0, r1
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: bl tan
; CHECK-NEXT: vmov d8, r0, r1
; CHECK-NEXT: vmov q0, q4
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r7, pc}
entry:
%0 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %src)
ret <2 x double> %0
}

define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) {
; CHECK-LABEL: exp_float32_t:
; CHECK: @ %bb.0: @ %entry
Expand Down
Loading