-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[InstCombine] Fold tan(x) * cos(x) => sin(x) #136319
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Summary: This patch enables folding tan(x) * cos(x) -> sin(x) under -ffast-math flag
@llvm/pr-subscribers-llvm-transforms Author: None (amordo) ChangesThis patch enables folding tan(x) * cos(x) -> sin(x) under -ffast-math flag. Alive2 proof is not able to check the transformation - https://alive2.llvm.org/ce/z/sgMYRJ The tests copy logic from https://github.com/llvm/llvm-project/blob/6c5f50f18694a4d91d7ce53a14188c54ee7c6f3b/llvm/test/Transforms/InstCombine/fdiv-sin-cos.ll Fixes #34950. Full diff: https://github.com/llvm/llvm-project/pull/136319.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index c7023eb79b04e..e3ba0c4468d92 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1073,6 +1073,16 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
return Result;
}
+ // tan(X) * cos(X) -> sin(X)
+ if (I.hasAllowReassoc() && Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *X;
+ if (match(Op0, m_Intrinsic<Intrinsic::tan>(m_Value(X))) &&
+ match(Op1, m_Intrinsic<Intrinsic::cos>(m_Specific(X)))) {
+ Value *Sin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, &I);
+ return replaceInstUsesWith(I, Sin);
+ }
+ }
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/fmul-tan-cos.ll b/llvm/test/Transforms/InstCombine/fmul-tan-cos.ll
new file mode 100644
index 0000000000000..8196e8f7c81ba
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fmul-tan-cos.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+define double @fmul_tan_cos(double %a) {
+; CHECK-LABEL: define double @fmul_tan_cos(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.tan.f64(double [[A]])
+; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.cos.f64(double [[A]])
+; CHECK-NEXT: [[RES:%.*]] = fmul double [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret double [[RES]]
+;
+ %1 = call double @llvm.tan.f64(double %a)
+ %2 = call double @llvm.cos.f64(double %a)
+ %res = fmul double %1, %2
+ ret double %res
+}
+
+define double @fmul_strict_tan_strict_cos_reassoc(double %a) {
+; CHECK-LABEL: define double @fmul_strict_tan_strict_cos_reassoc(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.tan.f64(double [[A]])
+; CHECK-NEXT: [[TMP2:%.*]] = call reassoc double @llvm.cos.f64(double [[A]])
+; CHECK-NEXT: [[RES:%.*]] = fmul double [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret double [[RES]]
+;
+ %1 = call double @llvm.tan.f64(double %a)
+ %2 = call reassoc double @llvm.cos.f64(double %a)
+ %res = fmul double %1, %2
+ ret double %res
+}
+
+define double @fmul_reassoc_tan_strict_cos_strict(double %a, ptr dereferenceable(2) %dummy) {
+; CHECK-LABEL: define double @fmul_reassoc_tan_strict_cos_strict(
+; CHECK-SAME: double [[A:%.*]], ptr dereferenceable(2) [[DUMMY:%.*]]) {
+; CHECK-NEXT: [[RES:%.*]] = call reassoc double @llvm.sin.f64(double [[A]])
+; CHECK-NEXT: ret double [[RES]]
+;
+ %1 = call double @llvm.tan.f64(double %a)
+ %2 = call double @llvm.cos.f64(double %a)
+ %res = fmul reassoc double %1, %2
+ ret double %res
+}
+
+define double @fmul_reassoc_tan_reassoc_cos_strict(double %a) {
+; CHECK-LABEL: define double @fmul_reassoc_tan_reassoc_cos_strict(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT: [[RES:%.*]] = call reassoc double @llvm.sin.f64(double [[A]])
+; CHECK-NEXT: ret double [[RES]]
+;
+ %1 = call reassoc double @llvm.tan.f64(double %a)
+ %2 = call double @llvm.cos.f64(double %a)
+ %res = fmul reassoc double %1, %2
+ ret double %res
+}
+
+define double @fmul_tan_cos_reassoc_multiple_uses(double %a) {
+; CHECK-LABEL: define double @fmul_tan_cos_reassoc_multiple_uses(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call reassoc double @llvm.tan.f64(double [[A]])
+; CHECK-NEXT: [[TMP2:%.*]] = call reassoc double @llvm.cos.f64(double [[A]])
+; CHECK-NEXT: [[RES:%.*]] = fmul reassoc double [[TMP1]], [[TMP2]]
+; CHECK-NEXT: call void @use(double [[TMP2]])
+; CHECK-NEXT: ret double [[RES]]
+;
+ %1 = call reassoc double @llvm.tan.f64(double %a)
+ %2 = call reassoc double @llvm.cos.f64(double %a)
+ %res = fmul reassoc double %1, %2
+ call void @use(double %2)
+ ret double %res
+}
+
+define double @fmul_tan_cos_reassoc(double %a) {
+; CHECK-LABEL: define double @fmul_tan_cos_reassoc(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT: [[RES:%.*]] = call reassoc double @llvm.sin.f64(double [[A]])
+; CHECK-NEXT: ret double [[RES]]
+;
+ %1 = call reassoc double @llvm.tan.f64(double %a)
+ %2 = call reassoc double @llvm.cos.f64(double %a)
+ %res = fmul reassoc double %1, %2
+ ret double %res
+}
+
+define float @fmul_tanf_cosf_reassoc(float %a) {
+; CHECK-LABEL: define float @fmul_tanf_cosf_reassoc(
+; CHECK-SAME: float [[A:%.*]]) {
+; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.sin.f32(float [[A]])
+; CHECK-NEXT: ret float [[RES]]
+;
+ %1 = call reassoc float @llvm.tan.f32(float %a)
+ %2 = call reassoc float @llvm.cos.f32(float %a)
+ %res = fmul reassoc float %1, %2
+ ret float %res
+}
+
+define fp128 @fmul_tanfp128_cosfp128_reassoc(fp128 %a) {
+; CHECK-LABEL: define fp128 @fmul_tanfp128_cosfp128_reassoc(
+; CHECK-SAME: fp128 [[A:%.*]]) {
+; CHECK-NEXT: [[RES:%.*]] = call reassoc fp128 @llvm.sin.f128(fp128 [[A]])
+; CHECK-NEXT: ret fp128 [[RES]]
+;
+ %1 = call reassoc fp128 @llvm.tan.fp128(fp128 %a)
+ %2 = call reassoc fp128 @llvm.cos.fp128(fp128 %a)
+ %res = fmul reassoc fp128 %1, %2
+ ret fp128 %res
+}
+
+declare double @llvm.sin.f64(double) #1
+declare float @llvm.sin.f32(float) #1
+declare fp128 @llvm.sin.fp128(fp128) #1
+
+declare double @llvm.cos.f64(double) #1
+declare float @llvm.cos.f32(float) #1
+declare fp128 @llvm.cos.fp128(fp128) #1
+
+declare double @llvm.tan.f64(double) #1
+declare float @llvm.tan.f32(float) #1
+declare fp128 @llvm.tan.fp128(fp128) #1
+
+declare void @use(double)
+
+attributes #0 = { nounwind readnone speculatable }
+attributes #1 = { nounwind readnone }
|
Moved the folding into InstCombinerImpl::foldFMulReassoc()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM w/ nit. Please wait for @arsenm to double confirm #136319 (comment).
The tests are fixed after change
Commuted case cos * tan and a negative test with mismatched X (tan(a) * cos(b))
@@ -947,6 +947,14 @@ Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) { | |||
return BinaryOperator::CreateFMulFMF(XX, Y, &I); | |||
} | |||
|
|||
// tan(X) * cos(X) -> sin(X) | |||
if (match(&I, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
reassoc/afn may be required, but I am not sure.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this would fall under contract, and I expect it to be precision improving
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do I understand correctly the instruction should only be checked for the contract
flag?
The diff will be 9d090b2
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't like contract
being used for this purpose. I think contract
should be more predictable and tied to hardware behavior. Historically, we have used reassoc
for algebraic transformations. If the user has enabled unsafe math optimizations broadly, this sort of optimization is what they'd expect, but if they've only enabled contract
I don't think it is.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think contract should be more predictable and tied to hardware behavior
This should have no relation to hardware behavior whatsoever. Hardware dependent semantics are simply not useful.
This is also inline with the proposal to fix the definition for the contract flag: https://discourse.llvm.org/t/rfc-fast-math-flags-semantics-contract/84478
contract should not be limited to the FMA case. If that were the intended only case to handle, it should not be a fast math flag. It needs broader applicability, and should cover cases that may increase precision
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What you are suggesting here is, IMO, a significant extension to the way the contract flag is interpreted
It's how the contract flag is already interpreted, see https://discourse.llvm.org/t/rfc-fast-math-flags-semantics-contract/84478
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are there other optimizations using it this way?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the backend we have some x / sqrt -> rsqrt contractions I wrote, I'd have to look at what other call optimizers are doing
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we're going to interpret the flag this way, the LangRef should be updated. I guess we can just deal with the fallout in clang if users complain.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Applied changes in 4715cb8
Is it resolved and PR could be merged?
Tests: vector case, flag preservation, and !fpmath metadata preservation
Removed folding from foldFMulReassoc() that assumed hasAllowReassoc() Moved folding back into visitFMul()
This patch enables folding tan(x) * cos(x) -> sin(x) under -ffast-math flag.
Alive2 proof is not able to check the transformation - https://alive2.llvm.org/ce/z/sgMYRJ
Seems (some) intrinsics are not supported by it, should mention in the doc?
The tests copy logic from https://github.com/llvm/llvm-project/blob/6c5f50f18694a4d91d7ce53a14188c54ee7c6f3b/llvm/test/Transforms/InstCombine/fdiv-sin-cos.ll
Fixes #34950.