Skip to content

Emit constrained atan2 intrinsic for clang builtin #113636

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -227,10 +227,10 @@ def FminimumNumF16F128 : Builtin, F16F128MathTemplate {
let Prototype = "T(T, T)";
}

def Atan2F128 : Builtin {
let Spellings = ["__builtin_atan2f128"];
def Atan2F16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_atan2"];
let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128, __float128)";
let Prototype = "T(T, T)";
}

def CopysignF16 : Builtin {
Expand Down
12 changes: 12 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2798,6 +2798,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
*this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan));

case Builtin::BIatan2:
case Builtin::BIatan2f:
case Builtin::BIatan2l:
case Builtin::BI__builtin_atan2:
case Builtin::BI__builtin_atan2f:
case Builtin::BI__builtin_atan2f16:
case Builtin::BI__builtin_atan2l:
case Builtin::BI__builtin_atan2f128:
return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(
*this, E, Intrinsic::atan2,
Intrinsic::experimental_constrained_atan2));

case Builtin::BIceil:
case Builtin::BIceilf:
case Builtin::BIceill:
Expand Down
14 changes: 7 additions & 7 deletions clang/test/CodeGen/X86/math-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,18 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {

__builtin_atan2(f,f); __builtin_atan2f(f,f) ; __builtin_atan2l(f, f); __builtin_atan2f128(f,f);

// NO__ERRNO: declare double @atan2(double noundef, double noundef) [[READNONE:#[0-9]+]]
// NO__ERRNO: declare float @atan2f(float noundef, float noundef) [[READNONE]]
// NO__ERRNO: declare x86_fp80 @atan2l(x86_fp80 noundef, x86_fp80 noundef) [[READNONE]]
// NO__ERRNO: declare fp128 @atan2f128(fp128 noundef, fp128 noundef) [[READNONE]]
// NO__ERRNO: declare double @llvm.atan2.f64(double, double) [[READNONE_INTRINSIC:#[0-9]+]]
// NO__ERRNO: declare float @llvm.atan2.f32(float, float) [[READNONE_INTRINSIC]]
// NO__ERRNO: declare x86_fp80 @llvm.atan2.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
// NO__ERRNO: declare fp128 @llvm.atan2.f128(fp128, fp128) [[READNONE_INTRINSIC]]
// HAS_ERRNO: declare double @atan2(double noundef, double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare float @atan2f(float noundef, float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @atan2l(x86_fp80 noundef, x86_fp80 noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare fp128 @atan2f128(fp128 noundef, fp128 noundef) [[NOT_READNONE]]

__builtin_copysign(f,f); __builtin_copysignf(f,f); __builtin_copysignl(f,f); __builtin_copysignf128(f,f);

// NO__ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC:#[0-9]+]]
// NO__ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC]]
// NO__ERRNO: declare float @llvm.copysign.f32(float, float) [[READNONE_INTRINSIC]]
// NO__ERRNO: declare x86_fp80 @llvm.copysign.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
// NO__ERRNO: declare fp128 @llvm.copysign.f128(fp128, fp128) [[READNONE_INTRINSIC]]
Expand Down Expand Up @@ -179,7 +179,7 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {

__builtin_acosh(f); __builtin_acoshf(f); __builtin_acoshl(f); __builtin_acoshf128(f);

// NO__ERRNO: declare double @acosh(double noundef) [[READNONE]]
// NO__ERRNO: declare double @acosh(double noundef) [[READNONE:#[0-9]+]]
// NO__ERRNO: declare float @acoshf(float noundef) [[READNONE]]
// NO__ERRNO: declare x86_fp80 @acoshl(x86_fp80 noundef) [[READNONE]]
// NO__ERRNO: declare fp128 @acoshf128(fp128 noundef) [[READNONE]]
Expand Down Expand Up @@ -721,10 +721,10 @@ __builtin_trunc(f); __builtin_truncf(f); __builtin_truncl(f); __builtin
// HAS_ERRNO: declare fp128 @llvm.trunc.f128(fp128) [[READNONE_INTRINSIC]]
};

// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} }
// NO__ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} }
// NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} }
// NO__ERRNO: attributes [[PURE]] = { {{.*}}memory(read){{.*}} }
// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} }

// HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} }
// HAS_ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} }
Expand Down
7 changes: 7 additions & 0 deletions clang/test/CodeGen/constrained-math-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ __builtin_atan(f); __builtin_atanf(f); __builtin_atanl(f); __builti
// CHECK: call x86_fp80 @llvm.experimental.constrained.atan.f80(x86_fp80 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
// CHECK: call fp128 @llvm.experimental.constrained.atan.f128(fp128 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")

__builtin_atan2(f,f); __builtin_atan2f(f,f); __builtin_atan2l(f,f); __builtin_atan2f128(f,f);

// CHECK: call double @llvm.experimental.constrained.atan2.f64(double %{{.*}}, double %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
// CHECK: call float @llvm.experimental.constrained.atan2.f32(float %{{.*}}, float %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
// CHECK: call x86_fp80 @llvm.experimental.constrained.atan2.f80(x86_fp80 %{{.*}}, x86_fp80 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
// CHECK: call fp128 @llvm.experimental.constrained.atan2.f128(fp128 %{{.*}}, fp128 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")

__builtin_ceil(f); __builtin_ceilf(f); __builtin_ceill(f); __builtin_ceilf128(f);

// CHECK: call double @llvm.experimental.constrained.ceil.f64(double %{{.*}}, metadata !"fpexcept.strict")
Expand Down
7 changes: 3 additions & 4 deletions clang/test/CodeGen/libcalls.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ void test_builtins(double d, float f, long double ld) {
double atan2_ = atan2(d, 2);
long double atan2l_ = atan2l(ld, ld);
float atan2f_ = atan2f(f, f);
// CHECK-NO: declare double @atan2(double noundef, double noundef) [[NUW_RN:#[0-9]+]]
// CHECK-NO: declare x86_fp80 @atan2l(x86_fp80 noundef, x86_fp80 noundef) [[NUW_RN]]
// CHECK-NO: declare float @atan2f(float noundef, float noundef) [[NUW_RN]]
// CHECK-NO: declare double @llvm.atan2.f64(double, double) [[NUW_RNI]]
// CHECK-NO: declare x86_fp80 @llvm.atan2.f80(x86_fp80, x86_fp80) [[NUW_RNI]]
// CHECK-NO: declare float @llvm.atan2.f32(float, float) [[NUW_RNI]]
// CHECK-YES: declare double @atan2(double noundef, double noundef) [[NUW]]
// CHECK-YES: declare x86_fp80 @atan2l(x86_fp80 noundef, x86_fp80 noundef) [[NUW]]
// CHECK-YES: declare float @atan2f(float noundef, float noundef) [[NUW]]
Expand All @@ -124,5 +124,4 @@ void test_builtins(double d, float f, long double ld) {
}

// CHECK-YES: attributes [[NUW]] = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" }
// CHECK-NO-DAG: attributes [[NUW_RN]] = { nounwind willreturn memory(none){{.*}} }
// CHECK-NO-DAG: attributes [[NUW_RNI]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
20 changes: 10 additions & 10 deletions clang/test/CodeGen/math-libcalls.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,19 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {

atan2(f,f); atan2f(f,f) ; atan2l(f, f);

// NO__ERRNO: declare double @atan2(double noundef, double noundef) [[READNONE:#[0-9]+]]
// NO__ERRNO: declare float @atan2f(float noundef, float noundef) [[READNONE]]
// NO__ERRNO: declare x86_fp80 @atan2l(x86_fp80 noundef, x86_fp80 noundef) [[READNONE]]
// NO__ERRNO: declare double @llvm.atan2.f64(double, double) [[READNONE_INTRINSIC:#[0-9]+]]
// NO__ERRNO: declare float @llvm.atan2.f32(float, float) [[READNONE_INTRINSIC]]
// NO__ERRNO: declare x86_fp80 @llvm.atan2.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
// HAS_ERRNO: declare double @atan2(double noundef, double noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare float @atan2f(float noundef, float noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @atan2l(x86_fp80 noundef, x86_fp80 noundef) [[NOT_READNONE]]
// HAS_MAYTRAP: declare double @atan2(double noundef, double noundef) [[NOT_READNONE:#[0-9]+]]
// HAS_MAYTRAP: declare float @atan2f(float noundef, float noundef) [[NOT_READNONE]]
// HAS_MAYTRAP: declare x86_fp80 @atan2l(x86_fp80 noundef, x86_fp80 noundef) [[NOT_READNONE]]
// HAS_MAYTRAP: declare double @llvm.experimental.constrained.atan2.f64(
// HAS_MAYTRAP: declare float @llvm.experimental.constrained.atan2.f32(
// HAS_MAYTRAP: declare x86_fp80 @llvm.experimental.constrained.atan2.f80(

copysign(f,f); copysignf(f,f);copysignl(f,f);

// NO__ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC:#[0-9]+]]
// NO__ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC]]
// NO__ERRNO: declare float @llvm.copysign.f32(float, float) [[READNONE_INTRINSIC]]
// NO__ERRNO: declare x86_fp80 @llvm.copysign.f80(x86_fp80, x86_fp80) [[READNONE_INTRINSIC]]
// HAS_ERRNO: declare double @llvm.copysign.f64(double, double) [[READNONE_INTRINSIC:#[0-9]+]]
Expand Down Expand Up @@ -65,13 +65,13 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
// HAS_ERRNO: declare double @frexp(double noundef, ptr noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare float @frexpf(float noundef, ptr noundef) [[NOT_READNONE]]
// HAS_ERRNO: declare x86_fp80 @frexpl(x86_fp80 noundef, ptr noundef) [[NOT_READNONE]]
// HAS_MAYTRAP: declare double @frexp(double noundef, ptr noundef) [[NOT_READNONE]]
// HAS_MAYTRAP: declare double @frexp(double noundef, ptr noundef) [[NOT_READNONE:#[0-9]+]]
// HAS_MAYTRAP: declare float @frexpf(float noundef, ptr noundef) [[NOT_READNONE]]
// HAS_MAYTRAP: declare x86_fp80 @frexpl(x86_fp80 noundef, ptr noundef) [[NOT_READNONE]]

ldexp(f,f); ldexpf(f,f); ldexpl(f,f);

// NO__ERRNO: declare double @ldexp(double noundef, i32 noundef) [[READNONE]]
// NO__ERRNO: declare double @ldexp(double noundef, i32 noundef) [[READNONE:#[0-9]+]]
// NO__ERRNO: declare float @ldexpf(float noundef, i32 noundef) [[READNONE]]
// NO__ERRNO: declare x86_fp80 @ldexpl(x86_fp80 noundef, i32 noundef) [[READNONE]]
// HAS_ERRNO: declare double @ldexp(double noundef, i32 noundef) [[NOT_READNONE]]
Expand Down Expand Up @@ -719,9 +719,9 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
// HAS_ERRNO: declare x86_fp80 @llvm.trunc.f80(x86_fp80) [[READNONE_INTRINSIC]]
};

// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} }
// NO__ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} }
// NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} }
// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} }
// NO__ERRNO: attributes [[READONLY]] = { {{.*}}memory(read){{.*}} }

// HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} }
Expand Down
16 changes: 8 additions & 8 deletions clang/test/CodeGenCXX/builtin-calling-conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ using size_t = unsigned long;
#endif // SPIR
} // namespace std

float __builtin_atan2f(float, float);
float __builtin_erff(float);
void *operator new(std::size_t);
#endif // REDECL

Expand All @@ -22,32 +22,32 @@ void foo();
void user() {
int i;
::operator new(5);
(void)__builtin_atan2f(1.1, 2.2);
(void)__builtin_erff(1.1);
foo();
}

// LINUX: define{{.*}} void @_Z4userv()
// LINUX: call noalias noundef nonnull ptr @_Znwm
// LINUX: call float @atan2f
// LINUX: call float @erff
// LINUX: call void @_Z3foov
// LINUX: declare noundef nonnull ptr @_Znwm(i64 noundef)
// LINUX: declare float @atan2f(float noundef, float noundef)
// LINUX: declare float @erff(float noundef)
// LINUX: declare void @_Z3foov()

// SPIR: define{{.*}} spir_func void @_Z4userv()
// SPIR: call spir_func noalias noundef nonnull ptr @_Znwj
// SPIR: call spir_func float @atan2f
// SPIR: call spir_func float @erff
// SPIR: call spir_func void @_Z3foov
// SPIR: declare spir_func noundef nonnull ptr @_Znwj(i32 noundef)
// SPIR: declare spir_func float @atan2f(float noundef, float noundef)
// SPIR: declare spir_func float @erff(float noundef)
// SPIR: declare spir_func void @_Z3foov()

// Note: Windows /G options should not change the platform default calling
// convention of builtins.
// WIN32: define dso_local x86_stdcallcc void @"?user@@YGXXZ"()
// WIN32: call noalias noundef nonnull ptr @"??2@YAPAXI@Z"
// WIN32: call float @atan2f
// WIN32: call float @erff
// WIN32: call x86_stdcallcc void @"?foo@@YGXXZ"
// WIN32: declare dso_local noundef nonnull ptr @"??2@YAPAXI@Z"(
// WIN32: declare dso_local float @atan2f(float noundef, float noundef)
// WIN32: declare dso_local float @erff(float noundef)
// WIN32: declare dso_local x86_stdcallcc void @"?foo@@YGXXZ"()
3 changes: 3 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-f16.cl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ void test_half_builtins(half h0, half h1, half h2, int i0) {
// CHECK: call half @llvm.atan.f16(half %h0)
res = __builtin_atanf16(h0);

// CHECK: call half @llvm.atan2.f16(half %h0, half %h1)
res = __builtin_atan2f16(h0, h1);

// CHECK: call half @llvm.copysign.f16(half %h0, half %h1)
res = __builtin_copysignf16(h0, h1);

Expand Down
49 changes: 43 additions & 6 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15748,16 +15748,17 @@ all types however.

::

declare float @llvm.atan2.f32(float %X, float %Y)
declare double @llvm.atan2.f64(double %X, double %Y)
declare x86_fp80 @llvm.atan2.f80(x86_fp80 %X, x86_fp80 %Y)
declare fp128 @llvm.atan2.f128(fp128 %X, fp128 %Y)
declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %X, ppc_fp128 %Y)
declare float @llvm.atan2.f32(float %Y, float %X)
declare double @llvm.atan2.f64(double %Y, double %X)
declare x86_fp80 @llvm.atan2.f80(x86_fp80 %Y, x86_fp80 %X)
declare fp128 @llvm.atan2.f128(fp128 %Y, fp128 %X)
declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %Y, ppc_fp128 %X)

Overview:
"""""""""

The '``llvm.atan2.*``' intrinsics return the arctangent of the operand.
The '``llvm.atan2.*``' intrinsics return the arctangent of ``Y/X`` accounting
for the quadrant.

Arguments:
""""""""""
Expand Down Expand Up @@ -27259,6 +27260,42 @@ This function returns the arctangent of the specified operand, returning the
same values as the libm ``atan`` functions would, and handles error
conditions in the same way.

'``llvm.experimental.constrained.atan2``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Syntax:
"""""""

::

declare <type>
@llvm.experimental.constrained.atan2(<type> <op1>,
<type> <op2>,
metadata <rounding mode>,
metadata <exception behavior>)

Overview:
"""""""""

The '``llvm.experimental.constrained.atan2``' intrinsic returns the arctangent
of ``<op1>`` divided by ``<op2>`` accounting for the quadrant.

Arguments:
""""""""""

The first two arguments and the return value are floating-point numbers of the
same type.

The third and fourth arguments specify the rounding mode and exception
behavior as described above.

Semantics:
""""""""""

This function returns the quadrant-specific arctangent using the specified
operands, returning the same values as the libm ``atan2`` functions would, and
handles error conditions in the same way.

'``llvm.experimental.constrained.sinh``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
16 changes: 16 additions & 0 deletions llvm/test/CodeGen/ARM/fp-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,13 @@ define float @tan_f32(float %x) #0 {
ret float %val
}

; CHECK-LABEL: atan2_f32:
; CHECK: bl atan2f
define float @atan2_f32(float %x, float %y) #0 {
%val = call float @llvm.experimental.constrained.atan2.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret float %val
}

; CHECK-LABEL: pow_f32:
; CHECK: bl powf
define float @pow_f32(float %x, float %y) #0 {
Expand Down Expand Up @@ -610,6 +617,13 @@ define double @tan_f64(double %x) #0 {
ret double %val
}

; CHECK-LABEL: atan2_f64:
; CHECK: bl atan2
define double @atan2_f64(double %x, double %y) #0 {
%val = call double @llvm.experimental.constrained.atan2.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret double %val
}

; CHECK-LABEL: pow_f64:
; CHECK: bl pow
define double @pow_f64(double %x, double %y) #0 {
Expand Down Expand Up @@ -1038,6 +1052,7 @@ declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, meta
declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.cos.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.atan2.f32(float, float, metadata, metadata)
declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata)
declare float @llvm.experimental.constrained.log.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.log10.f32(float, metadata, metadata)
Expand Down Expand Up @@ -1072,6 +1087,7 @@ declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, me
declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.log10.f64(double, metadata, metadata)
Expand Down
57 changes: 57 additions & 0 deletions llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,63 @@ for.body:
br i1 %cond, label %exit, label %for.body
}

; Check constrained ops converted to call
define void @testAtan2(ptr %cast1, ptr %cast2) strictfp {
; CHECK-LABEL: testAtan2:
; CHECK: # %bb.0: # %root
; CHECK-NEXT: mflr 0
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r28, -32
; CHECK-NEXT: .cfi_offset r29, -24
; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill
; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill
; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
; CHECK-NEXT: stdu 1, -64(1)
; CHECK-NEXT: addi 30, 3, -8
; CHECK-NEXT: addi 29, 4, -8
; CHECK-NEXT: li 28, 255
; CHECK-NEXT: std 0, 80(1)
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_1: # %for.body
; CHECK-NEXT: #
; CHECK-NEXT: lfdu 2, 8(29)
; CHECK-NEXT: lfdu 1, 8(30)
; CHECK-NEXT: bl atan2
; CHECK-NEXT: nop
; CHECK-NEXT: addi 28, 28, -1
; CHECK-NEXT: stfd 1, 0(30)
; CHECK-NEXT: cmpldi 28, 0
; CHECK-NEXT: bc 12, 1, .LBB3_1
; CHECK-NEXT: # %bb.2: # %exit
; CHECK-NEXT: addi 1, 1, 64
; CHECK-NEXT: ld 0, 16(1)
; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr 0
; CHECK-NEXT: blr
root:
br label %for.body

exit:
ret void

for.body:
%i = phi i64 [ 0, %root ], [ %next, %for.body ]
%idx1 = getelementptr inbounds double, ptr %cast1, i64 %i
%idx2 = getelementptr inbounds double, ptr %cast2, i64 %i
%val1 = load double, ptr %idx1
%val2 = load double, ptr %idx2
%tan = tail call nnan ninf nsz arcp double @llvm.experimental.constrained.atan2.f64(double %val1, double %val2, metadata !"round.dynamic", metadata !"fpexcept.strict")
store double %tan, ptr %idx1, align 8
%next = add nuw nsw i64 %i, 1
%cond = icmp eq i64 %next, 255
br i1 %cond, label %exit, label %for.body
}

declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
Loading
Loading