Skip to content

Reland: [DirectX] Add atan2 intrinsic and expand for DXIL backend (p1) #109878

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15583,6 +15583,43 @@ trapping or setting ``errno``.
When specified with the fast-math-flag 'afn', the result may be approximated
using a less accurate calculation.

'``llvm.atan2.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Syntax:
"""""""

This is an overloaded intrinsic. You can use ``llvm.atan2`` on any
floating-point or vector of floating-point type. Not all targets support
all types however.

::

declare float @llvm.atan2.f32(float %X, float %Y)
declare double @llvm.atan2.f64(double %X, double %Y)
declare x86_fp80 @llvm.atan2.f80(x86_fp80 %X, x86_fp80 %Y)
declare fp128 @llvm.atan2.f128(fp128 %X, fp128 %Y)
declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %X, ppc_fp128 %Y)

Overview:
"""""""""

The '``llvm.atan2.*``' intrinsics return the arctangent of the operand.

Arguments:
""""""""""

The arguments and return value are floating-point numbers of the same type.

Semantics:
""""""""""

Return the same value as a corresponding libm '``atan2``' function but without
trapping or setting ``errno``.

When specified with the fast-math-flag 'afn', the result may be approximated
using a less accurate calculation.

'``llvm.sinh.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -1016,6 +1016,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
def int_asin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_acos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_atan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_atan2 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_sin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_cos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_tan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
Expand Down
52 changes: 52 additions & 0 deletions llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ using namespace llvm;
static bool isIntrinsicExpansion(Function &F) {
switch (F.getIntrinsicID()) {
case Intrinsic::abs:
case Intrinsic::atan2:
case Intrinsic::exp:
case Intrinsic::log:
case Intrinsic::log10:
Expand Down Expand Up @@ -307,6 +308,54 @@ static Value *expandNormalizeIntrinsic(CallInst *Orig) {
return Builder.CreateFMul(X, MultiplicandVec);
}

static Value *expandAtan2Intrinsic(CallInst *Orig) {
Value *Y = Orig->getOperand(0);
Value *X = Orig->getOperand(1);
Type *Ty = X->getType();
IRBuilder<> Builder(Orig);
Builder.setFastMathFlags(Orig->getFastMathFlags());

Value *Tan = Builder.CreateFDiv(Y, X);

CallInst *Atan =
Builder.CreateIntrinsic(Ty, Intrinsic::atan, {Tan}, nullptr, "Elt.Atan");
Atan->setTailCall(Orig->isTailCall());
Atan->setAttributes(Orig->getAttributes());

// Modify atan result based on https://en.wikipedia.org/wiki/Atan2.
Constant *Pi = ConstantFP::get(Ty, llvm::numbers::pi);
Constant *HalfPi = ConstantFP::get(Ty, llvm::numbers::pi / 2);
Constant *NegHalfPi = ConstantFP::get(Ty, -llvm::numbers::pi / 2);
Constant *Zero = ConstantFP::get(Ty, 0);
Value *AtanAddPi = Builder.CreateFAdd(Atan, Pi);
Value *AtanSubPi = Builder.CreateFSub(Atan, Pi);

// x > 0 -> atan.
Value *Result = Atan;
Value *XLt0 = Builder.CreateFCmpOLT(X, Zero);
Value *XEq0 = Builder.CreateFCmpOEQ(X, Zero);
Value *YGe0 = Builder.CreateFCmpOGE(Y, Zero);
Value *YLt0 = Builder.CreateFCmpOLT(Y, Zero);

// x < 0, y >= 0 -> atan + pi.
Value *XLt0AndYGe0 = Builder.CreateAnd(XLt0, YGe0);
Result = Builder.CreateSelect(XLt0AndYGe0, AtanAddPi, Result);

// x < 0, y < 0 -> atan - pi.
Value *XLt0AndYLt0 = Builder.CreateAnd(XLt0, YLt0);
Result = Builder.CreateSelect(XLt0AndYLt0, AtanSubPi, Result);

// x == 0, y < 0 -> -pi/2
Value *XEq0AndYLt0 = Builder.CreateAnd(XEq0, YLt0);
Result = Builder.CreateSelect(XEq0AndYLt0, NegHalfPi, Result);

// x == 0, y > 0 -> pi/2
Value *XEq0AndYGe0 = Builder.CreateAnd(XEq0, YGe0);
Result = Builder.CreateSelect(XEq0AndYGe0, HalfPi, Result);

return Result;
}

static Value *expandPowIntrinsic(CallInst *Orig) {

Value *X = Orig->getOperand(0);
Expand Down Expand Up @@ -418,6 +467,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::abs:
Result = expandAbs(Orig);
break;
case Intrinsic::atan2:
Result = expandAtan2Intrinsic(Orig);
break;
case Intrinsic::exp:
Result = expandExpIntrinsic(Orig);
break;
Expand Down
87 changes: 87 additions & 0 deletions llvm/test/CodeGen/DirectX/atan2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK
; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK

; Make sure correct dxil expansions for atan2 are generated for float and half.

define noundef float @atan2_float(float noundef %y, float noundef %x) {
entry:
; CHECK: [[DIV:%.+]] = fdiv float %y, %x
; EXPCHECK: [[ATAN:%.+]] = call float @llvm.atan.f32(float [[DIV]])
; DOPCHECK: [[ATAN:%.+]] = call float @dx.op.unary.f32(i32 17, float [[DIV]])
; CHECK-DAG: [[ADD_PI:%.+]] = fadd float [[ATAN]], 0x400921FB60000000
; CHECK-DAG: [[SUB_PI:%.+]] = fsub float [[ATAN]], 0x400921FB60000000
; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt float %x, 0.000000e+00
; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq float %x, 0.000000e+00
; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge float %y, 0.000000e+00
; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt float %y, 0.000000e+00
; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]]
; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], float [[ADD_PI]], float [[ATAN]]
; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]]
; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], float [[SUB_PI]], float [[SELECT_ADD_PI]]
; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]]
; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], float 0xBFF921FB60000000, float [[SELECT_SUB_PI]]
; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]]
; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], float 0x3FF921FB60000000, float [[SELECT_NEGHPI]]
; CHECK: ret float [[SELECT_HPI]]
%elt.atan2 = call float @llvm.atan2.f32(float %y, float %x)
ret float %elt.atan2
}

define noundef half @atan2_half(half noundef %y, half noundef %x) {
entry:
; CHECK: [[DIV:%.+]] = fdiv half %y, %x
; EXPCHECK: [[ATAN:%.+]] = call half @llvm.atan.f16(half [[DIV]])
; DOPCHECK: [[ATAN:%.+]] = call half @dx.op.unary.f16(i32 17, half [[DIV]])
; CHECK-DAG: [[ADD_PI:%.+]] = fadd half [[ATAN]], 0xH4248
; CHECK-DAG: [[SUB_PI:%.+]] = fsub half [[ATAN]], 0xH4248
; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt half %x, 0xH0000
; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq half %x, 0xH0000
; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge half %y, 0xH0000
; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt half %y, 0xH0000
; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]]
; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], half [[ADD_PI]], half [[ATAN]]
; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]]
; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], half [[SUB_PI]], half [[SELECT_ADD_PI]]
; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]]
; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], half 0xHBE48, half [[SELECT_SUB_PI]]
; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]]
; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], half 0xH3E48, half [[SELECT_NEGHPI]]
; CHECK: ret half [[SELECT_HPI]]
%elt.atan2 = call half @llvm.atan2.f16(half %y, half %x)
ret half %elt.atan2
}

define noundef <4 x float> @atan2_float4(<4 x float> noundef %y, <4 x float> noundef %x) {
entry:
; Just Expansion, no scalarization or lowering:
; EXPCHECK: [[DIV:%.+]] = fdiv <4 x float> %y, %x
; EXPCHECK: [[ATAN:%.+]] = call <4 x float> @llvm.atan.v4f32(<4 x float> [[DIV]])
; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <4 x float> [[ATAN]], <float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000>
; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <4 x float> [[ATAN]], <float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000>
; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <4 x float> %x, zeroinitializer
; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <4 x float> %x, zeroinitializer
; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <4 x float> %y, zeroinitializer
; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <4 x float> %y, zeroinitializer
; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_GE_0]]
; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <4 x i1> [[XLT0_AND_YGE0]], <4 x float> [[ADD_PI]], <4 x float> [[ATAN]]
; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_LT_0]]
; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <4 x i1> [[XLT0_AND_YLT0]], <4 x float> [[SUB_PI]], <4 x float> [[SELECT_ADD_PI]]
; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_LT_0]]
; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <4 x i1> [[XEQ0_AND_YLT0]], <4 x float> <float 0xBFF921FB60000000, float 0xBFF921FB60000000, float 0xBFF921FB60000000, float 0xBFF921FB60000000>, <4 x float> [[SELECT_SUB_PI]]
; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_GE_0]]
; EXPCHECK: [[SELECT_HPI:%.+]] = select <4 x i1> [[XEQ0_AND_YGE0]], <4 x float> <float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0x3FF921FB60000000>, <4 x float> [[SELECT_NEGHPI]]
; EXPCHECK: ret <4 x float> [[SELECT_HPI]]

; Scalarization occurs after expansion, so atan scalarization is tested separately.
; Expansion, scalarization and lowering:
; Just make sure this expands to exactly 4 scalar DXIL atan (OpCode=17) calls.
; DOPCHECK-COUNT-4: call float @dx.op.unary.f32(i32 17, float %{{.*}})
; DOPCHECK-NOT: call float @dx.op.unary.f32(i32 17,

%elt.atan2 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %y, <4 x float> %x)
ret <4 x float> %elt.atan2
}

declare half @llvm.atan2.f16(half, half)
declare float @llvm.atan2.f32(float, float)
declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>)
11 changes: 11 additions & 0 deletions llvm/test/CodeGen/DirectX/atan2_error.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s

; DXIL operation atan does not support double overload type
; CHECK: in function atan2_double
; CHECK-SAME: Cannot create ATan operation: Invalid overload type

define noundef double @atan2_double(double noundef %a, double noundef %b) #0 {
entry:
%1 = call double @llvm.atan2.f64(double %a, double %b)
ret double %1
}
Loading