-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[CodeGen] Support tan vector usage across all backends #95518
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Add a default f16 type promotion
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-webassembly Author: Farzon Lotfi (farzonl) ChangesAdd a default f16 type promotion Full diff: https://github.com/llvm/llvm-project/pull/95518.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 8240a1fd7e2ff..de534994fa48c 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -961,7 +961,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(
{ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG,
ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
- ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT},
+ ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::FTAN},
VT, Expand);
// Constrained floating-point operations default to expand.
@@ -1020,6 +1020,7 @@ void TargetLoweringBase::initActions() {
ISD::FTAN},
{MVT::f32, MVT::f64, MVT::f128}, Expand);
+ setOperationAction(ISD::FTAN, MVT::f16, Promote);
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
index c493a9b2cb1df..bfc26b0d65980 100644
--- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
@@ -2862,3 +2862,123 @@ define i1 @isnan_d_fpclass(half %x) {
%1 = call i1 @llvm.is.fpclass.f16(half %x, i32 3) ; nan
ret i1 %1
}
+
+declare half @llvm.tan.f16(half)
+
+define half @tan_f16(half %a) nounwind {
+; RV32IZFH-LABEL: tan_f16:
+; RV32IZFH: # %bb.0:
+; RV32IZFH-NEXT: addi sp, sp, -16
+; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT: call tanf
+; RV32IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: ret
+;
+; RV64IZFH-LABEL: tan_f16:
+; RV64IZFH: # %bb.0:
+; RV64IZFH-NEXT: addi sp, sp, -16
+; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IZFH-NEXT: call tanf
+; RV64IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFH-NEXT: addi sp, sp, 16
+; RV64IZFH-NEXT: ret
+;
+; RV32IZHINX-LABEL: tan_f16:
+; RV32IZHINX: # %bb.0:
+; RV32IZHINX-NEXT: addi sp, sp, -16
+; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZHINX-NEXT: fcvt.s.h a0, a0
+; RV32IZHINX-NEXT: call tanf
+; RV32IZHINX-NEXT: fcvt.h.s a0, a0
+; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZHINX-NEXT: addi sp, sp, 16
+; RV32IZHINX-NEXT: ret
+;
+; RV64IZHINX-LABEL: tan_f16:
+; RV64IZHINX: # %bb.0:
+; RV64IZHINX-NEXT: addi sp, sp, -16
+; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZHINX-NEXT: fcvt.s.h a0, a0
+; RV64IZHINX-NEXT: call tanf
+; RV64IZHINX-NEXT: fcvt.h.s a0, a0
+; RV64IZHINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZHINX-NEXT: addi sp, sp, 16
+; RV64IZHINX-NEXT: ret
+;
+; RV32I-LABEL: tan_f16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: call __extendhfsf2
+; RV32I-NEXT: call tanf
+; RV32I-NEXT: call __truncsfhf2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: tan_f16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: call __extendhfsf2
+; RV64I-NEXT: call tanf
+; RV64I-NEXT: call __truncsfhf2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV32IZFHMIN-LABEL: tan_f16:
+; RV32IZFHMIN: # %bb.0:
+; RV32IZFHMIN-NEXT: addi sp, sp, -16
+; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0
+; RV32IZFHMIN-NEXT: call tanf
+; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0
+; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: addi sp, sp, 16
+; RV32IZFHMIN-NEXT: ret
+;
+; RV64IZFHMIN-LABEL: tan_f16:
+; RV64IZFHMIN: # %bb.0:
+; RV64IZFHMIN-NEXT: addi sp, sp, -16
+; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0
+; RV64IZFHMIN-NEXT: call tanf
+; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0
+; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFHMIN-NEXT: addi sp, sp, 16
+; RV64IZFHMIN-NEXT: ret
+;
+; RV32IZHINXMIN-LABEL: tan_f16:
+; RV32IZHINXMIN: # %bb.0:
+; RV32IZHINXMIN-NEXT: addi sp, sp, -16
+; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0
+; RV32IZHINXMIN-NEXT: call tanf
+; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0
+; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZHINXMIN-NEXT: addi sp, sp, 16
+; RV32IZHINXMIN-NEXT: ret
+;
+; RV64IZHINXMIN-LABEL: tan_f16:
+; RV64IZHINXMIN: # %bb.0:
+; RV64IZHINXMIN-NEXT: addi sp, sp, -16
+; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0
+; RV64IZHINXMIN-NEXT: call tanf
+; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0
+; RV64IZHINXMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZHINXMIN-NEXT: addi sp, sp, 16
+; RV64IZHINXMIN-NEXT: ret
+ %1 = call half @llvm.tan.f16(half %a)
+ ret half %1
+}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
index d214a3af5a151..1d6e073271efa 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
@@ -377,6 +377,14 @@ define <4 x float> @cos_v4f32(<4 x float> %x) {
ret <4 x float> %v
}
+; CHECK-LABEL: tan_v4f32:
+; CHECK: call $push[[L:[0-9]+]]=, tanf
+declare <4 x float> @llvm.tan.v4f32(<4 x float>)
+define <4 x float> @tan_v4f32(<4 x float> %x) {
+ %v = call <4 x float> @llvm.tan.v4f32(<4 x float> %x)
+ ret <4 x float> %v
+}
+
; CHECK-LABEL: powi_v4f32:
; CHECK: call $push[[L:[0-9]+]]=, __powisf2
declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32)
@@ -469,6 +477,14 @@ define <2 x double> @cos_v2f64(<2 x double> %x) {
ret <2 x double> %v
}
+; CHECK-LABEL: tan_v2f64:
+; CHECK: call $push[[L:[0-9]+]]=, tan
+declare <2 x double> @llvm.tan.v2f64(<2 x double>)
+define <2 x double> @tan_v2f64(<2 x double> %x) {
+ %v = call <2 x double> @llvm.tan.v2f64(<2 x double> %x)
+ ret <2 x double> %v
+}
+
; CHECK-LABEL: powi_v2f64:
; CHECK: call $push[[L:[0-9]+]]=, __powidf2
declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32)
diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
index 0f070347dd4ef..9c910d70807a1 100644
--- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
@@ -162,6 +162,60 @@ for.end: ; preds = %for.body, %entry
declare double @llvm.cos.f64(double)
+define void @tan_f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @tan_f32(
+; CHECK: llvm.tan.v4f32
+; CHECK: ret void
+;
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+ %0 = load float, ptr %arrayidx, align 4
+ %call = tail call float @llvm.tan.f32(float %0)
+ %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+ store float %call, ptr %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare float @llvm.tan.f32(float)
+
+define void @tan_f64(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @tan_f64(
+; CHECK: llvm.tan.v4f64
+; CHECK: ret void
+;
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+ %0 = load double, ptr %arrayidx, align 8
+ %call = tail call double @llvm.tan.f64(double %0)
+ %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+ store double %call, ptr %arrayidx2, align 8
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare double @llvm.tan.f64(double)
+
define void @exp_f32(i32 %n, ptr %y, ptr %x) {
; CHECK-LABEL: @exp_f32(
; CHECK: llvm.exp.v4f32
|
which backend to test was randomly picked to show vector and f16 support existsts without explicitly enabling it in the RISCV and WebAssembly backends. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Build failure looks to be caused by lldb timeouts, not related to this change. same timeout we are seeing here: https://buildkite.com/llvm-project/github-pull-requests/builds/72742#01901673-5e12-4a2f-a9f1-c7047777f32d
So will proceed with merge. |
Add a default f16 type promotion