Skip to content

Commit e2f463b

Browse files
authored
[aarch64] Add hyperbolic and arc trig intrinsic lowering (#98937)
## The change(s) - `VecFuncs.def`: define intrinsic to sleef/armpl mapping - `LegalizerHelper.cpp`: add missing `fewerElementsVector` handling for the new trig intrinsics - `AArch64ISelLowering.cpp`: Add arch64 specializations for lowering like neon instructions - `AArch64LegalizerInfo.cpp`: Legalize the new trig intrinsics. aarch64 has specail legalization requirments in `AArch64LegalizerInfo.cpp`. If we redirect the clang builtin without handling this we will break the aarch64 compiler ## History This change is part of an implementation of #87367 investigation on supporting IEEE math operations as intrinsics. Which was discussed in this RFC: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 This change adds wasm lowering cases for `acos`, `asin`, `atan`, `cosh`, `sinh`, and `tanh`. #70079 #70080 #70081 #70083 #70084 #95966 ## Why is aarch64 needed The last step is to redirect the `acos`, `asin`, `atan`, `cosh`, `sinh`, and `tanh` to emit the intrinsic. We can't emit the intrinsic without the intrinsics becoming legal for aarch64 in `AArch64LegalizerInfo.cpp`
1 parent c719d7b commit e2f463b

22 files changed

+3491
-22
lines changed

llvm/include/llvm/Analysis/VecFuncs.def

+66
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,18 @@ TLI_DEFINE_VECFUNC("llvm.exp.f32", "_simd_exp_f4", FIXED(4), "_ZGV_LLVM_N4v")
7373

7474
// Trigonometric Functions
7575
TLI_DEFINE_VECFUNC("acos", "_simd_acos_d2", FIXED(2), "_ZGV_LLVM_N2v")
76+
TLI_DEFINE_VECFUNC("llvm.acos.f64", "_simd_acos_d2", FIXED(2), "_ZGV_LLVM_N2v")
7677
TLI_DEFINE_VECFUNC("acosf", "_simd_acos_f4", FIXED(4), "_ZGV_LLVM_N4v")
78+
TLI_DEFINE_VECFUNC("llvm.acos.f32", "_simd_acos_f4", FIXED(4), "_ZGV_LLVM_N4v")
7779
TLI_DEFINE_VECFUNC("asin", "_simd_asin_d2", FIXED(2), "_ZGV_LLVM_N2v")
80+
TLI_DEFINE_VECFUNC("llvm.asin.f64", "_simd_asin_d2", FIXED(2), "_ZGV_LLVM_N2v")
7881
TLI_DEFINE_VECFUNC("asinf", "_simd_asin_f4", FIXED(4), "_ZGV_LLVM_N4v")
82+
TLI_DEFINE_VECFUNC("llvm.asin.f32", "_simd_asin_f4", FIXED(4), "_ZGV_LLVM_N4v")
7983

8084
TLI_DEFINE_VECFUNC("atan", "_simd_atan_d2", FIXED(2), "_ZGV_LLVM_N2v")
85+
TLI_DEFINE_VECFUNC("llvm.atan.f64", "_simd_atan_d2", FIXED(2), "_ZGV_LLVM_N2v")
8186
TLI_DEFINE_VECFUNC("atanf", "_simd_atan_f4", FIXED(4), "_ZGV_LLVM_N4v")
87+
TLI_DEFINE_VECFUNC("llvm.atan.f32", "_simd_atan_f4", FIXED(4), "_ZGV_LLVM_N4v")
8288
TLI_DEFINE_VECFUNC("atan2", "_simd_atan2_d2", FIXED(2), "_ZGV_LLVM_N2vv")
8389
TLI_DEFINE_VECFUNC("atan2f", "_simd_atan2_f4", FIXED(4), "_ZGV_LLVM_N4vv")
8490

@@ -109,11 +115,17 @@ TLI_DEFINE_VECFUNC("llvm.pow.f32", "_simd_pow_f4", FIXED(4), "_ZGV_LLVM_N4vv")
109115

110116
// Hyperbolic Functions
111117
TLI_DEFINE_VECFUNC("sinh", "_simd_sinh_d2", FIXED(2), "_ZGV_LLVM_N2v")
118+
TLI_DEFINE_VECFUNC("llvm.sinh.f64", "_simd_sinh_d2", FIXED(2), "_ZGV_LLVM_N2v")
112119
TLI_DEFINE_VECFUNC("sinhf", "_simd_sinh_f4", FIXED(4), "_ZGV_LLVM_N4v")
120+
TLI_DEFINE_VECFUNC("llvm.sinh.f32", "_simd_sinh_f4", FIXED(4), "_ZGV_LLVM_N4v")
113121
TLI_DEFINE_VECFUNC("cosh", "_simd_cosh_d2", FIXED(2), "_ZGV_LLVM_N2v")
122+
TLI_DEFINE_VECFUNC("llvm.cosh.f64", "_simd_cosh_d2", FIXED(2), "_ZGV_LLVM_N2v")
114123
TLI_DEFINE_VECFUNC("coshf", "_simd_cosh_f4", FIXED(4), "_ZGV_LLVM_N4v")
124+
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "_simd_cosh_f4", FIXED(4), "_ZGV_LLVM_N4v")
115125
TLI_DEFINE_VECFUNC("tanh", "_simd_tanh_d2", FIXED(2), "_ZGV_LLVM_N2v")
126+
TLI_DEFINE_VECFUNC("llvm.tanh.f64", "_simd_tanh_d2", FIXED(2), "_ZGV_LLVM_N2v")
116127
TLI_DEFINE_VECFUNC("tanhf", "_simd_tanh_f4", FIXED(4), "_ZGV_LLVM_N4v")
128+
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_simd_tanh_f4", FIXED(4), "_ZGV_LLVM_N4v")
117129
TLI_DEFINE_VECFUNC("asinh", "_simd_asinh_d2", FIXED(2), "_ZGV_LLVM_N2v")
118130
TLI_DEFINE_VECFUNC("asinhf", "_simd_asinh_f4", FIXED(4), "_ZGV_LLVM_N4v")
119131
TLI_DEFINE_VECFUNC("acosh", "_simd_acosh_d2", FIXED(2), "_ZGV_LLVM_N2v")
@@ -500,14 +512,17 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", FIXED(16), "_ZGV_LLVM_N16
500512
#elif defined(TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS)
501513

502514
TLI_DEFINE_VECFUNC("acos", "_ZGVnN2v_acos", FIXED(2), "_ZGV_LLVM_N2v")
515+
TLI_DEFINE_VECFUNC("llvm.acos.f64", "_ZGVnN2v_acos", FIXED(2), "_ZGV_LLVM_N2v")
503516

504517
TLI_DEFINE_VECFUNC("acosh", "_ZGVnN2v_acosh", FIXED(2), "_ZGV_LLVM_N2v")
505518

506519
TLI_DEFINE_VECFUNC("asin", "_ZGVnN2v_asin", FIXED(2), "_ZGV_LLVM_N2v")
520+
TLI_DEFINE_VECFUNC("llvm.asin.f64", "_ZGVnN2v_asin", FIXED(2), "_ZGV_LLVM_N2v")
507521

508522
TLI_DEFINE_VECFUNC("asinh", "_ZGVnN2v_asinh", FIXED(2), "_ZGV_LLVM_N2v")
509523

510524
TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v")
525+
TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v")
511526

512527
TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv")
513528

@@ -521,6 +536,7 @@ TLI_DEFINE_VECFUNC("cos", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v")
521536
TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v")
522537

523538
TLI_DEFINE_VECFUNC("cosh", "_ZGVnN2v_cosh", FIXED(2), "_ZGV_LLVM_N2v")
539+
TLI_DEFINE_VECFUNC("llvm.cosh.f64", "_ZGVnN2v_cosh", FIXED(2), "_ZGV_LLVM_N2v")
524540

525541
TLI_DEFINE_VECFUNC("cospi", "_ZGVnN2v_cospi", FIXED(2), "_ZGV_LLVM_N2v")
526542

@@ -583,6 +599,7 @@ TLI_DEFINE_VECFUNC("sincos", "_ZGVnN2vl8l8_sincos", FIXED(2), "_ZGV_LLVM_N2vl8l8
583599
TLI_DEFINE_VECFUNC("sincospi", "_ZGVnN2vl8l8_sincospi", FIXED(2), "_ZGV_LLVM_N2vl8l8")
584600

585601
TLI_DEFINE_VECFUNC("sinh", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v")
602+
TLI_DEFINE_VECFUNC("llvm.sinh.f64", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v")
586603

587604
TLI_DEFINE_VECFUNC("sinpi", "_ZGVnN2v_sinpi", FIXED(2), "_ZGV_LLVM_N2v")
588605

@@ -592,20 +609,24 @@ TLI_DEFINE_VECFUNC("tan", "_ZGVnN2v_tan", FIXED(2), "_ZGV_LLVM_N2v")
592609
TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVnN2v_tan", FIXED(2), "_ZGV_LLVM_N2v")
593610

594611
TLI_DEFINE_VECFUNC("tanh", "_ZGVnN2v_tanh", FIXED(2), "_ZGV_LLVM_N2v")
612+
TLI_DEFINE_VECFUNC("llvm.tanh.f64", "_ZGVnN2v_tanh", FIXED(2), "_ZGV_LLVM_N2v")
595613

596614
TLI_DEFINE_VECFUNC("tgamma", "_ZGVnN2v_tgamma", FIXED(2), "_ZGV_LLVM_N2v")
597615

598616
#elif defined(TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS)
599617

600618
TLI_DEFINE_VECFUNC("acosf", "_ZGVnN4v_acosf", FIXED(4), "_ZGV_LLVM_N4v")
619+
TLI_DEFINE_VECFUNC("llvm.acos.f32", "_ZGVnN4v_acosf", FIXED(4), "_ZGV_LLVM_N4v")
601620

602621
TLI_DEFINE_VECFUNC("acoshf", "_ZGVnN4v_acoshf", FIXED(4), "_ZGV_LLVM_N4v")
603622

604623
TLI_DEFINE_VECFUNC("asinf", "_ZGVnN4v_asinf", FIXED(4), "_ZGV_LLVM_N4v")
624+
TLI_DEFINE_VECFUNC("llvm.asin.f32", "_ZGVnN4v_asinf", FIXED(4), "_ZGV_LLVM_N4v")
605625

606626
TLI_DEFINE_VECFUNC("asinhf", "_ZGVnN4v_asinhf", FIXED(4), "_ZGV_LLVM_N4v")
607627

608628
TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v")
629+
TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v")
609630

610631
TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv")
611632

@@ -619,6 +640,7 @@ TLI_DEFINE_VECFUNC("cosf", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v")
619640
TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v")
620641

621642
TLI_DEFINE_VECFUNC("coshf", "_ZGVnN4v_coshf", FIXED(4), "_ZGV_LLVM_N4v")
643+
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "_ZGVnN4v_coshf", FIXED(4), "_ZGV_LLVM_N4v")
622644

623645
TLI_DEFINE_VECFUNC("cospif", "_ZGVnN4v_cospif", FIXED(4), "_ZGV_LLVM_N4v")
624646

@@ -681,6 +703,7 @@ TLI_DEFINE_VECFUNC("sincosf", "_ZGVnN4vl4l4_sincosf", FIXED(4), "_ZGV_LLVM_N4vl4
681703
TLI_DEFINE_VECFUNC("sincospif", "_ZGVnN4vl4l4_sincospif", FIXED(4), "_ZGV_LLVM_N4vl4l4")
682704

683705
TLI_DEFINE_VECFUNC("sinhf", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v")
706+
TLI_DEFINE_VECFUNC("llvm.sinh.f32", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v")
684707

685708
TLI_DEFINE_VECFUNC("sinpif", "_ZGVnN4v_sinpif", FIXED(4), "_ZGV_LLVM_N4v")
686709

@@ -690,25 +713,32 @@ TLI_DEFINE_VECFUNC("tanf", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
690713
TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
691714

692715
TLI_DEFINE_VECFUNC("tanhf", "_ZGVnN4v_tanhf", FIXED(4), "_ZGV_LLVM_N4v")
716+
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVnN4v_tanhf", FIXED(4), "_ZGV_LLVM_N4v")
693717

694718
TLI_DEFINE_VECFUNC("tgammaf", "_ZGVnN4v_tgammaf", FIXED(4), "_ZGV_LLVM_N4v")
695719

696720
#elif defined(TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS)
697721

698722
TLI_DEFINE_VECFUNC("acos", "_ZGVsMxv_acos", SCALABLE(2), MASKED, "_ZGVsMxv")
699723
TLI_DEFINE_VECFUNC("acosf", "_ZGVsMxv_acosf", SCALABLE(4), MASKED, "_ZGVsMxv")
724+
TLI_DEFINE_VECFUNC("llvm.acos.f64", "_ZGVsMxv_acos", SCALABLE(2), MASKED, "_ZGVsMxv")
725+
TLI_DEFINE_VECFUNC("llvm.acos.f32", "_ZGVsMxv_acosf", SCALABLE(4), MASKED, "_ZGVsMxv")
700726

701727
TLI_DEFINE_VECFUNC("acosh", "_ZGVsMxv_acosh", SCALABLE(2), MASKED, "_ZGVsMxv")
702728
TLI_DEFINE_VECFUNC("acoshf", "_ZGVsMxv_acoshf", SCALABLE(4), MASKED, "_ZGVsMxv")
703729

704730
TLI_DEFINE_VECFUNC("asin", "_ZGVsMxv_asin", SCALABLE(2), MASKED, "_ZGVsMxv")
705731
TLI_DEFINE_VECFUNC("asinf", "_ZGVsMxv_asinf", SCALABLE(4), MASKED, "_ZGVsMxv")
732+
TLI_DEFINE_VECFUNC("llvm.asin.f64", "_ZGVsMxv_asin", SCALABLE(2), MASKED, "_ZGVsMxv")
733+
TLI_DEFINE_VECFUNC("llvm.asin.f32", "_ZGVsMxv_asinf", SCALABLE(4), MASKED, "_ZGVsMxv")
706734

707735
TLI_DEFINE_VECFUNC("asinh", "_ZGVsMxv_asinh", SCALABLE(2), MASKED, "_ZGVsMxv")
708736
TLI_DEFINE_VECFUNC("asinhf", "_ZGVsMxv_asinhf", SCALABLE(4), MASKED, "_ZGVsMxv")
709737

710738
TLI_DEFINE_VECFUNC("atan", "_ZGVsMxv_atan", SCALABLE(2), MASKED, "_ZGVsMxv")
711739
TLI_DEFINE_VECFUNC("atanf", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGVsMxv")
740+
TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVsMxv_atan", SCALABLE(2), MASKED, "_ZGVsMxv")
741+
TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGVsMxv")
712742

713743
TLI_DEFINE_VECFUNC("atan2", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv")
714744
TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv")
@@ -729,6 +759,8 @@ TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsM
729759

730760
TLI_DEFINE_VECFUNC("cosh", "_ZGVsMxv_cosh", SCALABLE(2), MASKED, "_ZGVsMxv")
731761
TLI_DEFINE_VECFUNC("coshf", "_ZGVsMxv_coshf", SCALABLE(4), MASKED, "_ZGVsMxv")
762+
TLI_DEFINE_VECFUNC("llvm.cosh.f64", "_ZGVsMxv_cosh", SCALABLE(2), MASKED, "_ZGVsMxv")
763+
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "_ZGVsMxv_coshf", SCALABLE(4), MASKED, "_ZGVsMxv")
732764

733765
TLI_DEFINE_VECFUNC("cospi", "_ZGVsMxv_cospi", SCALABLE(2), MASKED, "_ZGVsMxv")
734766
TLI_DEFINE_VECFUNC("cospif", "_ZGVsMxv_cospif", SCALABLE(4), MASKED, "_ZGVsMxv")
@@ -826,6 +858,8 @@ TLI_DEFINE_VECFUNC("sincospif", "_ZGVsNxvl4l4_sincospif", SCALABLE(4), NOMASK, "
826858

827859
TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv")
828860
TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv")
861+
TLI_DEFINE_VECFUNC("llvm.sinh.f64", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv")
862+
TLI_DEFINE_VECFUNC("llvm.sinh.f32", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv")
829863

830864
TLI_DEFINE_VECFUNC("sinpi", "_ZGVsMxv_sinpi", SCALABLE(2), MASKED, "_ZGVsMxv")
831865
TLI_DEFINE_VECFUNC("sinpif", "_ZGVsMxv_sinpif", SCALABLE(4), MASKED, "_ZGVsMxv")
@@ -840,6 +874,8 @@ TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVsMxv_tanf", SCALABLE(4), MASKED, "_ZGVsM
840874

841875
TLI_DEFINE_VECFUNC("tanh", "_ZGVsMxv_tanh", SCALABLE(2), MASKED, "_ZGVsMxv")
842876
TLI_DEFINE_VECFUNC("tanhf", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED, "_ZGVsMxv")
877+
TLI_DEFINE_VECFUNC("llvm.tanh.f64", "_ZGVsMxv_tanh", SCALABLE(2), MASKED, "_ZGVsMxv")
878+
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED, "_ZGVsMxv")
843879

844880
TLI_DEFINE_VECFUNC("tgamma", "_ZGVsMxv_tgamma", SCALABLE(2), MASKED, "_ZGVsMxv")
845881
TLI_DEFINE_VECFUNC("tgammaf", "_ZGVsMxv_tgammaf", SCALABLE(4), MASKED, "_ZGVsMxv")
@@ -851,6 +887,11 @@ TLI_DEFINE_VECFUNC("acosf", "armpl_vacosq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v
851887
TLI_DEFINE_VECFUNC("acos", "armpl_svacos_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
852888
TLI_DEFINE_VECFUNC("acosf", "armpl_svacos_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
853889

890+
TLI_DEFINE_VECFUNC("llvm.acos.f64", "armpl_vacosq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
891+
TLI_DEFINE_VECFUNC("llvm.acos.f32", "armpl_vacosq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
892+
TLI_DEFINE_VECFUNC("llvm.acos.f64", "armpl_svacos_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
893+
TLI_DEFINE_VECFUNC("llvm.acos.f32", "armpl_svacos_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
894+
854895
TLI_DEFINE_VECFUNC("acosh", "armpl_vacoshq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
855896
TLI_DEFINE_VECFUNC("acoshf", "armpl_vacoshq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
856897
TLI_DEFINE_VECFUNC("acosh", "armpl_svacosh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
@@ -861,6 +902,11 @@ TLI_DEFINE_VECFUNC("asinf", "armpl_vasinq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v
861902
TLI_DEFINE_VECFUNC("asin", "armpl_svasin_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
862903
TLI_DEFINE_VECFUNC("asinf", "armpl_svasin_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
863904

905+
TLI_DEFINE_VECFUNC("llvm.asin.f64", "armpl_vasinq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
906+
TLI_DEFINE_VECFUNC("llvm.asin.f32", "armpl_vasinq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
907+
TLI_DEFINE_VECFUNC("llvm.asin.f64", "armpl_svasin_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
908+
TLI_DEFINE_VECFUNC("llvm.asin.f32", "armpl_svasin_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
909+
864910
TLI_DEFINE_VECFUNC("asinh", "armpl_vasinhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
865911
TLI_DEFINE_VECFUNC("asinhf", "armpl_vasinhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
866912
TLI_DEFINE_VECFUNC("asinh", "armpl_svasinh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
@@ -871,6 +917,11 @@ TLI_DEFINE_VECFUNC("atanf", "armpl_vatanq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v
871917
TLI_DEFINE_VECFUNC("atan", "armpl_svatan_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
872918
TLI_DEFINE_VECFUNC("atanf", "armpl_svatan_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
873919

920+
TLI_DEFINE_VECFUNC("llvm.atan.f64", "armpl_vatanq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
921+
TLI_DEFINE_VECFUNC("llvm.atan.f32", "armpl_vatanq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
922+
TLI_DEFINE_VECFUNC("llvm.atan.f64", "armpl_svatan_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
923+
TLI_DEFINE_VECFUNC("llvm.atan.f32", "armpl_svatan_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
924+
874925
TLI_DEFINE_VECFUNC("atan2", "armpl_vatan2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv")
875926
TLI_DEFINE_VECFUNC("atan2f", "armpl_vatan2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv")
876927
TLI_DEFINE_VECFUNC("atan2", "armpl_svatan2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv")
@@ -906,6 +957,11 @@ TLI_DEFINE_VECFUNC("coshf", "armpl_vcoshq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v
906957
TLI_DEFINE_VECFUNC("cosh", "armpl_svcosh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
907958
TLI_DEFINE_VECFUNC("coshf", "armpl_svcosh_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
908959

960+
TLI_DEFINE_VECFUNC("llvm.cosh.f64", "armpl_vcoshq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
961+
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "armpl_vcoshq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
962+
TLI_DEFINE_VECFUNC("llvm.cosh.f64", "armpl_svcosh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
963+
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "armpl_svcosh_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
964+
909965
TLI_DEFINE_VECFUNC("cospi", "armpl_vcospiq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
910966
TLI_DEFINE_VECFUNC("cospif", "armpl_vcospiq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
911967
TLI_DEFINE_VECFUNC("cospi", "armpl_svcospi_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
@@ -1081,6 +1137,11 @@ TLI_DEFINE_VECFUNC("sinhf", "armpl_vsinhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v
10811137
TLI_DEFINE_VECFUNC("sinh", "armpl_svsinh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
10821138
TLI_DEFINE_VECFUNC("sinhf", "armpl_svsinh_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
10831139

1140+
TLI_DEFINE_VECFUNC("llvm.sinh.f64", "armpl_vsinhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
1141+
TLI_DEFINE_VECFUNC("llvm.sinh.f32", "armpl_vsinhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1142+
TLI_DEFINE_VECFUNC("llvm.sinh.f64", "armpl_svsinh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
1143+
TLI_DEFINE_VECFUNC("llvm.sinh.f32", "armpl_svsinh_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
1144+
10841145
TLI_DEFINE_VECFUNC("sinpi", "armpl_vsinpiq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
10851146
TLI_DEFINE_VECFUNC("sinpif", "armpl_vsinpiq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
10861147
TLI_DEFINE_VECFUNC("sinpi", "armpl_svsinpi_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
@@ -1106,6 +1167,11 @@ TLI_DEFINE_VECFUNC("tanhf", "armpl_vtanhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v
11061167
TLI_DEFINE_VECFUNC("tanh", "armpl_svtanh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
11071168
TLI_DEFINE_VECFUNC("tanhf", "armpl_svtanh_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
11081169

1170+
TLI_DEFINE_VECFUNC("llvm.tanh.f64", "armpl_vtanhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
1171+
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "armpl_vtanhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1172+
TLI_DEFINE_VECFUNC("llvm.tanh.f64", "armpl_svtanh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
1173+
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "armpl_svtanh_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
1174+
11091175
TLI_DEFINE_VECFUNC("tgamma", "armpl_vtgammaq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
11101176
TLI_DEFINE_VECFUNC("tgammaf", "armpl_vtgammaq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
11111177
TLI_DEFINE_VECFUNC("tgamma", "armpl_svtgamma_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -4747,6 +4747,12 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
47474747
case G_FCOS:
47484748
case G_FSIN:
47494749
case G_FTAN:
4750+
case G_FACOS:
4751+
case G_FASIN:
4752+
case G_FATAN:
4753+
case G_FCOSH:
4754+
case G_FSINH:
4755+
case G_FTANH:
47504756
case G_FSQRT:
47514757
case G_BSWAP:
47524758
case G_BITREVERSE:

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+18
Original file line numberDiff line numberDiff line change
@@ -732,10 +732,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
732732

733733
for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
734734
ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
735+
ISD::FACOS, ISD::FASIN, ISD::FATAN,
736+
ISD::FCOSH, ISD::FSINH, ISD::FTANH,
735737
ISD::FTAN, ISD::FEXP, ISD::FEXP2,
736738
ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
737739
ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW,
738740
ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN,
741+
ISD::STRICT_FACOS, ISD::STRICT_FASIN, ISD::STRICT_FATAN,
742+
ISD::STRICT_FCOSH, ISD::STRICT_FSINH, ISD::STRICT_FTANH,
739743
ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG,
740744
ISD::STRICT_FLOG2, ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) {
741745
setOperationAction(Op, MVT::f16, Promote);
@@ -1176,6 +1180,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
11761180
ISD::FNEG, ISD::FABS, ISD::FCEIL,
11771181
ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
11781182
ISD::FSIN, ISD::FCOS, ISD::FTAN,
1183+
ISD::FASIN, ISD::FACOS, ISD::FATAN,
1184+
ISD::FSINH, ISD::FCOSH, ISD::FTANH,
11791185
ISD::FPOW, ISD::FLOG, ISD::FLOG2,
11801186
ISD::FLOG10, ISD::FEXP, ISD::FEXP2,
11811187
ISD::FEXP10, ISD::FRINT, ISD::FROUND,
@@ -1615,6 +1621,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
16151621
setOperationAction(ISD::FSIN, VT, Expand);
16161622
setOperationAction(ISD::FSINCOS, VT, Expand);
16171623
setOperationAction(ISD::FTAN, VT, Expand);
1624+
setOperationAction(ISD::FACOS, VT, Expand);
1625+
setOperationAction(ISD::FASIN, VT, Expand);
1626+
setOperationAction(ISD::FATAN, VT, Expand);
1627+
setOperationAction(ISD::FCOSH, VT, Expand);
1628+
setOperationAction(ISD::FSINH, VT, Expand);
1629+
setOperationAction(ISD::FTANH, VT, Expand);
16181630
setOperationAction(ISD::FEXP, VT, Expand);
16191631
setOperationAction(ISD::FEXP2, VT, Expand);
16201632
setOperationAction(ISD::FEXP10, VT, Expand);
@@ -1822,6 +1834,12 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
18221834
setOperationAction(ISD::FSIN, VT, Expand);
18231835
setOperationAction(ISD::FCOS, VT, Expand);
18241836
setOperationAction(ISD::FTAN, VT, Expand);
1837+
setOperationAction(ISD::FASIN, VT, Expand);
1838+
setOperationAction(ISD::FACOS, VT, Expand);
1839+
setOperationAction(ISD::FATAN, VT, Expand);
1840+
setOperationAction(ISD::FSINH, VT, Expand);
1841+
setOperationAction(ISD::FCOSH, VT, Expand);
1842+
setOperationAction(ISD::FTANH, VT, Expand);
18251843
setOperationAction(ISD::FPOW, VT, Expand);
18261844
setOperationAction(ISD::FLOG, VT, Expand);
18271845
setOperationAction(ISD::FLOG2, VT, Expand);

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -267,8 +267,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
267267
.libcallFor({{s64, s128}})
268268
.minScalarOrElt(1, MinFPScalar);
269269

270-
getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
271-
G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10})
270+
getActionDefinitionsBuilder(
271+
{G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FTAN, G_FEXP,
272+
G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FCOSH, G_FSINH, G_FTANH})
272273
// We need a call for these, so we always need to scalarize.
273274
.scalarize(0)
274275
// Regardless of FP16 support, widen 16-bit elements to 32-bits.

0 commit comments

Comments
 (0)