Skip to content

Commit 918313d

Browse files
authored
[SLPVectorizer] Support SLPVectorizer cases of tan across all backends (#95517)
This PR is intended to address the limited SLPVectorizer support of tan raised in the comments of this PR: #94559. Right now emitting the tan intrinsisic allows you to vectorize tan, but emitting the libfunc does not. to address this the libcall needs to be mapped to the intrinsic. and the libcall and function name need to be marked approriately so they can be optimized or defined as a call lowering.
1 parent aa8409f commit 918313d

File tree

6 files changed

+43
-18
lines changed

6 files changed

+43
-18
lines changed

llvm/include/llvm/Analysis/TargetLibraryInfo.h

+3
Original file line numberDiff line numberDiff line change
@@ -410,10 +410,12 @@ class TargetLibraryInfo {
410410
return false;
411411
switch (F) {
412412
default: break;
413+
// clang-format off
413414
case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl:
414415
case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl:
415416
case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl:
416417
case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl:
418+
case LibFunc_tan: case LibFunc_tanf: case LibFunc_tanl:
417419
case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl:
418420
case LibFunc_sqrt_finite: case LibFunc_sqrtf_finite:
419421
case LibFunc_sqrtl_finite:
@@ -432,6 +434,7 @@ class TargetLibraryInfo {
432434
case LibFunc_memcmp: case LibFunc_bcmp: case LibFunc_strcmp:
433435
case LibFunc_strcpy: case LibFunc_stpcpy: case LibFunc_strlen:
434436
case LibFunc_strnlen: case LibFunc_memchr: case LibFunc_mempcpy:
437+
// clang-format on
435438
return true;
436439
}
437440
return false;

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

+7-4
Original file line numberDiff line numberDiff line change
@@ -156,14 +156,17 @@ class TargetTransformInfoImplBase {
156156
StringRef Name = F->getName();
157157

158158
// These will all likely lower to a single selection DAG node.
159+
// clang-format off
159160
if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
160-
Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
161+
Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
161162
Name == "fmin" || Name == "fminf" || Name == "fminl" ||
162163
Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
163-
Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
164-
Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
164+
Name == "sin" || Name == "sinf" || Name == "sinl" ||
165+
Name == "cos" || Name == "cosf" || Name == "cosl" ||
166+
Name == "tan" || Name == "tanf" || Name == "tanl" ||
167+
Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
165168
return false;
166-
169+
// clang-format on
167170
// These are all likely to be optimized into something smaller.
168171
if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
169172
Name == "exp2l" || Name == "exp2f" || Name == "floor" ||

llvm/lib/Analysis/ValueTracking.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -3994,6 +3994,10 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
39943994
case LibFunc_cosf:
39953995
case LibFunc_cosl:
39963996
return Intrinsic::cos;
3997+
case LibFunc_tan:
3998+
case LibFunc_tanf:
3999+
case LibFunc_tanl:
4000+
return Intrinsic::tan;
39974001
case LibFunc_exp:
39984002
case LibFunc_expf:
39994003
case LibFunc_expl:

llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll

+5-7
Original file line numberDiff line numberDiff line change
@@ -548,13 +548,11 @@ define <4 x float> @tan_4x(ptr %a) {
548548
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
549549
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
550550
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
551-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
552-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
553-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
554-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
555-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
556-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
557-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
551+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
552+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP3]])
553+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
554+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
555+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
558556
;
559557
entry:
560558
%0 = load <4 x float>, ptr %a, align 16

llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll

+5-7
Original file line numberDiff line numberDiff line change
@@ -548,13 +548,11 @@ define <4 x float> @tan_4x(ptr %a) {
548548
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
549549
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
550550
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
551-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
552-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
553-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
554-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
555-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
556-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
557-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
551+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
552+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP3]])
553+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
554+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
555+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
558556
;
559557
entry:
560558
%0 = load <4 x float>, ptr %a, align 16

llvm/test/Transforms/SLPVectorizer/X86/call.ll

+19
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0"
66

77
declare double @sin(double) nounwind willreturn
88
declare double @cos(double) nounwind willreturn
9+
declare double @tan(double) nounwind willreturn
910
declare double @pow(double, double) nounwind willreturn
1011
declare double @exp2(double) nounwind willreturn
1112
declare double @sqrt(double) nounwind willreturn
@@ -48,6 +49,24 @@ define void @cos_libm(ptr %a, ptr %b) {
4849
ret void
4950
}
5051

52+
define void @tan_libm(ptr %a, ptr %b) {
53+
; CHECK-LABEL: @tan_libm(
54+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
55+
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.tan.v2f64(<2 x double> [[TMP2]])
56+
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
57+
; CHECK-NEXT: ret void
58+
;
59+
%a0 = load double, ptr %a, align 8
60+
%idx1 = getelementptr inbounds double, ptr %a, i64 1
61+
%a1 = load double, ptr %idx1, align 8
62+
%tan1 = tail call double @tan(double %a0) nounwind readnone
63+
%tan2 = tail call double @tan(double %a1) nounwind readnone
64+
store double %tan1, ptr %b, align 8
65+
%idx2 = getelementptr inbounds double, ptr %b, i64 1
66+
store double %tan2, ptr %idx2, align 8
67+
ret void
68+
}
69+
5170
define void @pow_libm(ptr %a, ptr %b) {
5271
; CHECK-LABEL: @pow_libm(
5372
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8

0 commit comments

Comments
 (0)