-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[ARM] Add tan intrinsic lowering #95439
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
- `ARMISelLowering.cpp` - Add f16 type and neon and mve vector support for tan
@llvm/pr-subscribers-backend-arm Author: Farzon Lotfi (farzonl) Changes
Full diff: https://github.com/llvm/llvm-project/pull/95439.diff 8 Files Affected:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index e3270471981cc..6faa02ec17aac 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -365,6 +365,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FTAN, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
@@ -875,6 +876,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
+ setOperationAction(ISD::FTAN, MVT::v2f64, Expand);
setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
@@ -897,6 +899,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
+ setOperationAction(ISD::FTAN, MVT::v4f32, Expand);
setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
@@ -914,6 +917,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
+ setOperationAction(ISD::FTAN, MVT::v2f32, Expand);
setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
@@ -1540,6 +1544,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FTAN, MVT::f16, Promote);
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
diff --git a/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index e14e598086249..b6ebeaae5eb6d 100644
--- a/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -56,6 +56,37 @@ L.entry:
declare <4 x float> @llvm.cos.v4f32(<4 x float>) nounwind readonly
+define void @test_tan(ptr %X) nounwind {
+
+; CHECK-LABEL: test_tan:
+
+; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK: movt [[reg0]], :upper16:{{.*}}
+; CHECK: vld1.64
+
+; CHECK: {{v?mov(.32)?}} r0,
+; CHECK: bl {{.*}}tanf
+
+; CHECK: {{v?mov(.32)?}} r0,
+; CHECK: bl {{.*}}tanf
+
+; CHECK: {{v?mov(.32)?}} r0,
+; CHECK: bl {{.*}}tanf
+
+; CHECK: {{v?mov(.32)?}} r0,
+; CHECK: bl {{.*}}tanf
+
+; CHECK: vst1.64
+
+L.entry:
+ %0 = load <4 x float>, ptr @A, align 16
+ %1 = call <4 x float> @llvm.tan.v4f32(<4 x float> %0)
+ store <4 x float> %1, ptr %X, align 16
+ ret void
+}
+
+declare <4 x float> @llvm.tan.v4f32(<4 x float>) nounwind readonly
+
define void @test_exp(ptr %X) nounwind {
; CHECK-LABEL: test_exp:
diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
index 7381d517505e8..2656cdbb0347e 100644
--- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
@@ -281,6 +281,23 @@ define void @test_cos(ptr %p) {
ret void
}
+define void @test_tan(ptr %p) {
+; CHECK-LABEL: test_tan:
+; CHECK: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: vldr.16 s0, [r0]
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-NEXT: vstr.16 s0, [r4]
+; CHECK-NEXT: pop {r4, pc}
+ %a = load half, ptr %p, align 2
+ %r = call half @llvm.tan.f16(half %a)
+ store half %r, ptr %p
+ ret void
+}
+
define void @test_pow(ptr %p, ptr %q) {
; CHECK-LABEL: test_pow:
; CHECK: .save {r4, lr}
@@ -588,6 +605,7 @@ declare half @llvm.sqrt.f16(half %a)
declare half @llvm.powi.f16.i32(half %a, i32 %b)
declare half @llvm.sin.f16(half %a)
declare half @llvm.cos.f16(half %a)
+declare half @llvm.tan.f16(half %a)
declare half @llvm.pow.f16(half %a, half %b)
declare half @llvm.exp.f16(half %a)
declare half @llvm.exp2.f16(half %a)
diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll
index 9c01129ff30d8..ae3b8f9920e3b 100644
--- a/llvm/test/CodeGen/ARM/fp16-promote.ll
+++ b/llvm/test/CodeGen/ARM/fp16-promote.ll
@@ -393,6 +393,7 @@ declare half @llvm.sqrt.f16(half %a) #0
declare half @llvm.powi.f16.i32(half %a, i32 %b) #0
declare half @llvm.sin.f16(half %a) #0
declare half @llvm.cos.f16(half %a) #0
+declare half @llvm.tan.f16(half %a) #0
declare half @llvm.pow.f16(half %a, half %b) #0
declare half @llvm.exp.f16(half %a) #0
declare half @llvm.exp2.f16(half %a) #0
@@ -472,6 +473,21 @@ define void @test_cos(ptr %p) #0 {
ret void
}
+; CHECK-FP16-LABEL: test_tan:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl tanf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_tan:
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl tanf
+; CHECK-LIBCALL: bl __aeabi_f2h
+define void @test_tan(ptr %p) #0 {
+ %a = load half, ptr %p, align 2
+ %r = call half @llvm.tan.f16(half %a)
+ store half %r, ptr %p
+ ret void
+}
+
; CHECK-FP16-LABEL: test_pow:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
diff --git a/llvm/test/CodeGen/ARM/vfloatintrinsics.ll b/llvm/test/CodeGen/ARM/vfloatintrinsics.ll
index 028bb76c3d435..74782d44c7423 100644
--- a/llvm/test/CodeGen/ARM/vfloatintrinsics.ll
+++ b/llvm/test/CodeGen/ARM/vfloatintrinsics.ll
@@ -29,6 +29,12 @@ define %v2f32 @test_v2f32.cos(%v2f32 %a) {
%1 = call %v2f32 @llvm.cos.v2f32(%v2f32 %a)
ret %v2f32 %1
}
+; CHECK-LABEL: test_v2f32.tan:{{.*}}
+define %v2f32 @test_v2f32.tan(%v2f32 %a) {
+ ; CHECK: tan
+ %1 = call %v2f32 @llvm.tan.v2f32(%v2f32 %a)
+ ret %v2f32 %1
+}
; CHECK-LABEL: test_v2f32.pow:{{.*}}
define %v2f32 @test_v2f32.pow(%v2f32 %a, %v2f32 %b) {
; CHECK: pow
@@ -112,6 +118,7 @@ declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0
declare %v2f32 @llvm.powi.v2f32.i32(%v2f32, i32) #0
declare %v2f32 @llvm.sin.v2f32(%v2f32) #0
declare %v2f32 @llvm.cos.v2f32(%v2f32) #0
+declare %v2f32 @llvm.tan.v2f32(%v2f32) #0
declare %v2f32 @llvm.pow.v2f32(%v2f32, %v2f32) #0
declare %v2f32 @llvm.exp.v2f32(%v2f32) #0
declare %v2f32 @llvm.exp2.v2f32(%v2f32) #0
@@ -153,6 +160,12 @@ define %v4f32 @test_v4f32.cos(%v4f32 %a) {
%1 = call %v4f32 @llvm.cos.v4f32(%v4f32 %a)
ret %v4f32 %1
}
+; CHECK-LABEL: test_v4f32.tan:{{.*}}
+define %v4f32 @test_v4f32.tan(%v4f32 %a) {
+ ; CHECK: tan
+ %1 = call %v4f32 @llvm.tan.v4f32(%v4f32 %a)
+ ret %v4f32 %1
+}
; CHECK-LABEL: test_v4f32.pow:{{.*}}
define %v4f32 @test_v4f32.pow(%v4f32 %a, %v4f32 %b) {
; CHECK: pow
@@ -236,6 +249,7 @@ declare %v4f32 @llvm.sqrt.v4f32(%v4f32) #0
declare %v4f32 @llvm.powi.v4f32.i32(%v4f32, i32) #0
declare %v4f32 @llvm.sin.v4f32(%v4f32) #0
declare %v4f32 @llvm.cos.v4f32(%v4f32) #0
+declare %v4f32 @llvm.tan.v4f32(%v4f32) #0
declare %v4f32 @llvm.pow.v4f32(%v4f32, %v4f32) #0
declare %v4f32 @llvm.exp.v4f32(%v4f32) #0
declare %v4f32 @llvm.exp2.v4f32(%v4f32) #0
@@ -277,6 +291,12 @@ define %v2f64 @test_v2f64.cos(%v2f64 %a) {
%1 = call %v2f64 @llvm.cos.v2f64(%v2f64 %a)
ret %v2f64 %1
}
+; CHECK-LABEL: test_v2f64.tan:{{.*}}
+define %v2f64 @test_v2f64.tan(%v2f64 %a) {
+ ; CHECK: tan
+ %1 = call %v2f64 @llvm.tan.v2f64(%v2f64 %a)
+ ret %v2f64 %1
+}
; CHECK-LABEL: test_v2f64.pow:{{.*}}
define %v2f64 @test_v2f64.pow(%v2f64 %a, %v2f64 %b) {
; CHECK: pow
@@ -361,6 +381,7 @@ declare %v2f64 @llvm.sqrt.v2f64(%v2f64) #0
declare %v2f64 @llvm.powi.v2f64.i32(%v2f64, i32) #0
declare %v2f64 @llvm.sin.v2f64(%v2f64) #0
declare %v2f64 @llvm.cos.v2f64(%v2f64) #0
+declare %v2f64 @llvm.tan.v2f64(%v2f64) #0
declare %v2f64 @llvm.pow.v2f64(%v2f64, %v2f64) #0
declare %v2f64 @llvm.exp.v2f64(%v2f64) #0
declare %v2f64 @llvm.exp2.v2f64(%v2f64) #0
diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
index 70a5939865b7b..7f5da36886939 100644
--- a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
+++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
@@ -41,6 +41,15 @@ define double @cos_d(double %a) {
ret double %1
}
+declare double @llvm.tan.f64(double %Val)
+define double @tan_d(double %a) {
+; CHECK-LABEL: tan_d:
+; SOFT: {{(bl|b)}} tan
+; HARD: b tan
+ %1 = call double @llvm.tan.f64(double %a)
+ ret double %1
+}
+
declare double @llvm.pow.f64(double %Val, double %power)
define double @pow_d(double %a, double %b) {
; CHECK-LABEL: pow_d:
diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
index b6b891edd0461..94ba9b218a072 100644
--- a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
+++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
@@ -42,6 +42,15 @@ define float @cos_f(float %a) {
ret float %1
}
+declare float @llvm.tan.f32(float %Val)
+define float @tan_f(float %a) {
+; CHECK-LABEL: tan_f:
+; SOFT: bl tanf
+; HARD: b tanf
+ %1 = call float @llvm.tan.f32(float %a)
+ ret float %1
+}
+
declare float @llvm.pow.f32(float %Val, float %power)
define float @pow_f(float %a, float %b) {
; CHECK-LABEL: pow_f:
diff --git a/llvm/test/CodeGen/Thumb2/mve-fmath.ll b/llvm/test/CodeGen/Thumb2/mve-fmath.ll
index c299b62a4c942..d747da76a45fa 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fmath.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fmath.ll
@@ -288,6 +288,117 @@ entry:
ret <2 x double> %0
}
+define arm_aapcs_vfpcc <4 x float> @tan_float32_t(<4 x float> %src) {
+; CHECK-LABEL: tan_float32_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r7, lr}
+; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r4, d9
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov r4, r1, d8
+; CHECK-NEXT: vmov s19, r0
+; CHECK-NEXT: vmov s18, r5
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s17, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s16, r0
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %0 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %src)
+ ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @tan_float16_t(<8 x half> %src) {
+; CHECK-LABEL: tan_float16_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vcvtb.f32.f16 s0, s16
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vcvtt.f32.f16 s0, s16
+; CHECK-NEXT: vmov s16, r0
+; CHECK-NEXT: vmov r1, s0
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvtb.f16.f32 s20, s16
+; CHECK-NEXT: vcvtt.f16.f32 s20, s0
+; CHECK-NEXT: vcvtb.f32.f16 s0, s17
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvtb.f16.f32 s21, s0
+; CHECK-NEXT: vcvtt.f32.f16 s0, s17
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvtt.f16.f32 s21, s0
+; CHECK-NEXT: vcvtb.f32.f16 s0, s18
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvtb.f16.f32 s22, s0
+; CHECK-NEXT: vcvtt.f32.f16 s0, s18
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvtt.f16.f32 s22, s0
+; CHECK-NEXT: vcvtb.f32.f16 s0, s19
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvtb.f16.f32 s23, s0
+; CHECK-NEXT: vcvtt.f32.f16 s0, s19
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl tanf
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvtt.f16.f32 s23, s0
+; CHECK-NEXT: vmov q0, q5
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <8 x half> @llvm.tan.v8f16(<8 x half> %src)
+ ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <2 x double> @tan_float64_t(<2 x double> %src) {
+; CHECK-LABEL: tan_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl tan
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl tan
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) {
; CHECK-LABEL: exp_float32_t:
; CHECK: @ %bb.0: @ %entry
|
looks reasonable to me, although I'm not super familiar with backend stuff so perhaps an ARM person can review |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should just be setting the default operation for FTAN for all types to be Expand, so that all the backends (including all the downstream ones) will continue to work with the new nodes.
Having said that this looks very sensible. LGTM
ARMISelLowering.cpp
- Add f16 type and neon and mve vector support for tan