-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[DXIL] Add sign intrinsic part 2 #101988
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DXIL] Add sign intrinsic part 2 #101988
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
e9660ac
to
a37044d
Compare
@llvm/pr-subscribers-backend-directx @llvm/pr-subscribers-llvm-ir Author: Tim Gymnich (tgymnich) Changespartially fixes #70078 Changes
Related PRsFull diff: https://github.com/llvm/llvm-project/pull/101988.diff 3 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 312c3862f240d..b39591c3211ef 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -60,4 +60,5 @@ def int_dx_imad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLV
def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_dx_rcp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_dx_rsqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty]>;
}
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index ac85859af8a53..d8e4f903db73c 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
@@ -45,6 +46,7 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::dx_length:
case Intrinsic::dx_sdot:
case Intrinsic::dx_udot:
+ case Intrinsic::dx_sign:
return true;
}
return false;
@@ -293,6 +295,36 @@ static bool expandClampIntrinsic(CallInst *Orig, Intrinsic::ID ClampIntrinsic) {
return true;
}
+static bool expandSignIntrinsic(CallInst *Orig) {
+ IRBuilder<> Builder(Orig->getParent());
+ Value *X = Orig->getOperand(0);
+ Type *Ty = X->getType();
+ Type *ScalarTy = Ty->getScalarType();
+ Type *RetTy = Orig->getType();
+ Constant *Zero = Constant::getNullValue(Ty);
+ Builder.SetInsertPoint(Orig);
+
+ Value *GT;
+ Value *LT;
+ if (ScalarTy->isFloatingPointTy()) {
+ GT = Builder.CreateFCmpOLT(Zero, X);
+ LT = Builder.CreateFCmpOLT(X, Zero);
+ } else {
+ assert(ScalarTy->isIntegerTy());
+ GT = Builder.CreateICmpSLT(Zero, X);
+ LT = Builder.CreateICmpSLT(X, Zero);
+ }
+
+ Value *ZextGT = Builder.CreateZExt(GT, RetTy);
+ Value *ZextLT = Builder.CreateZExt(LT, RetTy);
+
+ Value *Ret = Builder.CreateSub(ZextGT, ZextLT);
+
+ Orig->replaceAllUsesWith(Ret);
+ Orig->eraseFromParent();
+ return true;
+}
+
static bool expandIntrinsic(Function &F, CallInst *Orig) {
switch (F.getIntrinsicID()) {
case Intrinsic::abs:
@@ -317,6 +349,8 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::dx_sdot:
case Intrinsic::dx_udot:
return expandIntegerDot(Orig, F.getIntrinsicID());
+ case Intrinsic::dx_sign:
+ return expandSignIntrinsic(Orig);
}
return false;
}
diff --git a/llvm/test/CodeGen/DirectX/sign.ll b/llvm/test/CodeGen/DirectX/sign.ll
new file mode 100644
index 0000000000000..2d9254a3abc77
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/sign.ll
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
+
+
+define noundef i32 @sign_half(half noundef %a) {
+; CHECK-LABEL: define noundef i32 @sign_half(
+; CHECK-SAME: half noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt half 0xH0000, [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt half [[A]], 0xH0000
+; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret i32 [[TMP4]]
+;
+entry:
+ %elt.sign = call i32 @llvm.dx.sign.f16(half %a)
+ ret i32 %elt.sign
+}
+
+define noundef i32 @sign_float(float noundef %a) {
+; CHECK-LABEL: define noundef i32 @sign_float(
+; CHECK-SAME: float noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt float 0.000000e+00, [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt float [[A]], 0.000000e+00
+; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret i32 [[TMP4]]
+;
+entry:
+ %elt.sign = call i32 @llvm.dx.sign.f32(float %a)
+ ret i32 %elt.sign
+}
+
+define noundef i32 @sign_double(double noundef %a) {
+; CHECK-LABEL: define noundef i32 @sign_double(
+; CHECK-SAME: double noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt double 0.000000e+00, [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A]], 0.000000e+00
+; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret i32 [[TMP4]]
+;
+entry:
+ %elt.sign = call i32 @llvm.dx.sign.f64(double %a)
+ ret i32 %elt.sign
+}
+
+define noundef i32 @sign_i16(i16 noundef %a) {
+; CHECK-LABEL: define noundef i32 @sign_i16(
+; CHECK-SAME: i16 noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i16 0, [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[A]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret i32 [[TMP4]]
+;
+entry:
+ %elt.sign = call i32 @llvm.dx.sign.i16(i16 %a)
+ ret i32 %elt.sign
+}
+
+define noundef i32 @sign_i32(i32 noundef %a) {
+; CHECK-LABEL: define noundef i32 @sign_i32(
+; CHECK-SAME: i32 noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 0, [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret i32 [[TMP4]]
+;
+entry:
+ %elt.sign = call i32 @llvm.dx.sign.i32(i32 %a)
+ ret i32 %elt.sign
+}
+
+define noundef i32 @sign_i64(i64 noundef %a) {
+; CHECK-LABEL: define noundef i32 @sign_i64(
+; CHECK-SAME: i64 noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 0, [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[A]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret i32 [[TMP4]]
+;
+entry:
+ %elt.sign = call i32 @llvm.dx.sign.i64(i64 %a)
+ ret i32 %elt.sign
+}
+
+define noundef <4 x i32> @sign_half_vector(<4 x half> noundef %a) {
+; CHECK-LABEL: define noundef <4 x i32> @sign_half_vector(
+; CHECK-SAME: <4 x half> noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x half> zeroinitializer, [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x half> [[A]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
+;
+entry:
+ %elt.sign = call <4 x i32> @llvm.dx.sign.v4f16(<4 x half> %a)
+ ret <4 x i32> %elt.sign
+}
+
+define noundef <4 x i32> @sign_float_vector(<4 x float> noundef %a) {
+; CHECK-LABEL: define noundef <4 x i32> @sign_float_vector(
+; CHECK-SAME: <4 x float> noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> zeroinitializer, [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[A]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
+;
+entry:
+ %elt.sign = call <4 x i32> @llvm.dx.sign.v4f32(<4 x float> %a)
+ ret <4 x i32> %elt.sign
+}
+
+define noundef <4 x i32> @sign_double_vector(<4 x double> noundef %a) {
+; CHECK-LABEL: define noundef <4 x i32> @sign_double_vector(
+; CHECK-SAME: <4 x double> noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x double> zeroinitializer, [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[A]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
+;
+entry:
+ %elt.sign = call <4 x i32> @llvm.dx.sign.v4f64(<4 x double> %a)
+ ret <4 x i32> %elt.sign
+}
+
+define noundef <4 x i32> @sign_i16_vector(<4 x i16> noundef %a) {
+; CHECK-LABEL: define noundef <4 x i32> @sign_i16_vector(
+; CHECK-SAME: <4 x i16> noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i16> zeroinitializer, [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i16> [[A]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
+;
+entry:
+ %elt.sign = call <4 x i32> @llvm.dx.sign.v4i16(<4 x i16> %a)
+ ret <4 x i32> %elt.sign
+}
+
+define noundef <4 x i32> @sign_i32_vector(<4 x i32> noundef %a) {
+; CHECK-LABEL: define noundef <4 x i32> @sign_i32_vector(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> zeroinitializer, [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[A]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
+;
+entry:
+ %elt.sign = call <4 x i32> @llvm.dx.sign.v4i32(<4 x i32> %a)
+ ret <4 x i32> %elt.sign
+}
+
+define noundef <4 x i32> @sign_i64_vector(<4 x i64> noundef %a) {
+; CHECK-LABEL: define noundef <4 x i32> @sign_i64_vector(
+; CHECK-SAME: <4 x i64> noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i64> zeroinitializer, [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i64> [[A]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
+;
+entry:
+ %elt.sign = call <4 x i32> @llvm.dx.sign.v4i64(<4 x i64> %a)
+ ret <4 x i32> %elt.sign
+}
+
+
+declare i32 @llvm.dx.sign.f16(half)
+declare i32 @llvm.dx.sign.f32(float)
+declare i32 @llvm.dx.sign.f64(double)
+
+declare i32 @llvm.dx.sign.i16(i16)
+declare i32 @llvm.dx.sign.i32(i32)
+declare i32 @llvm.dx.sign.i64(i64)
+
+declare <4 x i32> @llvm.dx.sign.v4f16(<4 x half>)
+declare <4 x i32> @llvm.dx.sign.v4f32(<4 x float>)
+declare <4 x i32> @llvm.dx.sign.v4f64(<4 x double>)
+
+declare <4 x i32> @llvm.dx.sign.v4i16(<4 x i16>)
+declare <4 x i32> @llvm.dx.sign.v4i32(<4 x i32>)
+declare <4 x i32> @llvm.dx.sign.v4i64(<4 x i64>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; DOPCHECK: {{.*}}
+; EXPCHECK: {{.*}}
|
Type *ScalarTy = Ty->getScalarType(); | ||
Type *RetTy = Orig->getType(); | ||
Constant *Zero = Constant::getNullValue(Ty); | ||
Builder.SetInsertPoint(Orig); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks correct to me and is pretty much what we do in DXC via HLOperationLower.cpp
's TranslateSign
. A small nitpick I like to keep the the builder setup code close together ie
IRBuilder<> Builder(Orig->getParent());
Builder.SetInsertPoint(Orig);
Second from the way you are going about it, it looks like you will need a second intrinsic for unsigned sign expansion. so a Intrinsic::dx_usign
before we can close out the ticket. Is that your understanding aswell?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have not yet decided how to go about this yet.
We can either add usign
builtins/intrinsics all the way down from hlsl to dxil and spirv. Or just add a high level builtin to hlsl.
I don't like the idea of having an intrinsic for select(x == 0, 0, 1)
.
I wonder from a consistency point of view if it is even necessary to have a usign
builtin since abs
also only seems to offer the signed variants in hlsl_intrinsics.h
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
part 1 fixed in 61bc2a8
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i'm not opposed to doing a high level operation for unsigned sign
cases, however for reference this is what DXC does: https://github.com/microsoft/DirectXShaderCompiler/blob/0e7591a6ee94c8c8eb0d536ce7815fd56a776451/lib/HLSL/HLOperationLower.cpp#L2703C1-L2714C2
As for abs
its a no-op in code gen:
https://github.com/microsoft/DirectXShaderCompiler/blob/0e7591a6ee94c8c8eb0d536ce7815fd56a776451/lib/HLSL/HLOperationLower.cpp#L1666C1-L1671C2
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's do the high level op then.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A small nitpick I like to keep the the builder setup code close together ie
IRBuilder<> Builder(Orig->getParent()); Builder.SetInsertPoint(Orig);
I didn't see any discussion about this nor any changes in code. Also peculiar to me is the resistance to use the more direct approach which is to set the insertion point with the constructor. Instead of setting it to the start of the basic block and then resetting it, you can just set it with Orig
and be done with it.
IRBuilder<> Builder(Orig);
@tgymnich you got approval on this PR, is there something you are waiting on? |
@farzonl This seems good to go. I cannot merge this myself though. I will take some time later today to rebase this PR on main. |
@farzonl PR should be good to go now. I don't have commit access, so I'd appreciate if you could merge this for me. |
Let me get one more reviewer from my team to take a look and then I'll merge it for you. Thanks again. Also thank you for your patience. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I won't insist about the IRBuilder constructor issue, but the end of the test checks confuse me
Type *ScalarTy = Ty->getScalarType(); | ||
Type *RetTy = Orig->getType(); | ||
Constant *Zero = Constant::getNullValue(Ty); | ||
Builder.SetInsertPoint(Orig); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A small nitpick I like to keep the the builder setup code close together ie
IRBuilder<> Builder(Orig->getParent()); Builder.SetInsertPoint(Orig);
I didn't see any discussion about this nor any changes in code. Also peculiar to me is the resistance to use the more direct approach which is to set the insertion point with the constructor. Instead of setting it to the start of the basic block and then resetting it, you can just set it with Orig
and be done with it.
IRBuilder<> Builder(Orig);
llvm/test/CodeGen/DirectX/sign.ll
Outdated
declare <4 x i32> @llvm.dx.sign.v4i16(<4 x i16>) | ||
declare <4 x i32> @llvm.dx.sign.v4i32(<4 x i32>) | ||
declare <4 x i32> @llvm.dx.sign.v4i64(<4 x i64>) | ||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't follow. Is this file autogenerated? The checks below don't seem to be useful.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed. I might have run the test updater over this file at some point in the past.
@pow2clk I removed the unnecessary checks and fixed the |
78c8d82
to
c1c5d5c
Compare
I don't have commit access so feel free to merge this for me. |
I will. Our current policy on HLSL is two approvals. So I’m waiting for a second one. |
Thank you @tgymnich. Congrats on your first DirectX contribution! |
partially fixes #70078 ### Changes - Implemented `sign` clang builtin - Linked `sign` clang builtin with `hlsl_intrinsics.h` - Added sema checks for `sign` to `CheckHLSLBuiltinFunctionCall` in `SemaChecking.cpp` - Add codegen for `sign` to `EmitHLSLBuiltinExpr` in `CGBuiltin.cpp` - Add codegen tests to `clang/test/CodeGenHLSL/builtins/sign.hlsl` - Add sema tests to `clang/test/SemaHLSL/BuiltIns/sign-errors.hlsl` ### Related PRs - #101987 - #101988 ### Discussion - Should there be a `usign` intrinsic that handles the unsigned cases?
- Add handling for unsigned integers to hlsl_elementwise_sign - Use `select` instead of adding dx and spirv intrinsics for unsigned integers as [discussed previously ](#101988 (comment)) fixes #70078 ### Related PRs - #101987 - #101988 - #101989 cc @farzonl @pow2clk @bob80905 @bogner @llvm-beanz
- Add handling for unsigned integers to hlsl_elementwise_sign - Use `select` instead of adding dx and spirv intrinsics for unsigned integers as [discussed previously ](llvm#101988 (comment)) fixes llvm#70078 ### Related PRs - llvm#101987 - llvm#101988 - llvm#101989 cc @farzonl @pow2clk @bob80905 @bogner @llvm-beanz
makes progress on #70078
Changes
int_dx_sign
intrinsic inIntrinsicsDirectX.td
int_dx_sign in
DXILIntrinsicExpansion.cpp`Related PRs