Skip to content

Commit 5edede2

Browse files
authored
[DXIL] Add sign intrinsic part 2 (#101988)
makes progress on #70078 ### Changes - Added `int_dx_sign` intrinsic in `IntrinsicsDirectX.td` - Added expansion for `int_dx_sign in `DXILIntrinsicExpansion.cpp` - Added DXIL backend test case ### Related PRs - #101987 - #101989
1 parent eee2f02 commit 5edede2

File tree

3 files changed

+244
-0
lines changed

3 files changed

+244
-0
lines changed

llvm/include/llvm/IR/IntrinsicsDirectX.td

+1
Original file line numberDiff line numberDiff line change
@@ -81,4 +81,5 @@ def int_dx_rcp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
8181
def int_dx_rsqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
8282

8383
def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>;
84+
def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty]>;
8485
}

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

+31
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/ADT/SmallVector.h"
1717
#include "llvm/CodeGen/Passes.h"
1818
#include "llvm/IR/IRBuilder.h"
19+
#include "llvm/IR/InstrTypes.h"
1920
#include "llvm/IR/Instruction.h"
2021
#include "llvm/IR/Instructions.h"
2122
#include "llvm/IR/Intrinsics.h"
@@ -48,6 +49,7 @@ static bool isIntrinsicExpansion(Function &F) {
4849
case Intrinsic::dx_fdot:
4950
case Intrinsic::dx_sdot:
5051
case Intrinsic::dx_udot:
52+
case Intrinsic::dx_sign:
5153
return true;
5254
}
5355
return false;
@@ -359,6 +361,32 @@ static Value *expandClampIntrinsic(CallInst *Orig,
359361
{MaxCall, Max}, nullptr, "dx.min");
360362
}
361363

364+
static Value *expandSignIntrinsic(CallInst *Orig) {
365+
Value *X = Orig->getOperand(0);
366+
Type *Ty = X->getType();
367+
Type *ScalarTy = Ty->getScalarType();
368+
Type *RetTy = Orig->getType();
369+
Constant *Zero = Constant::getNullValue(Ty);
370+
371+
IRBuilder<> Builder(Orig);
372+
373+
Value *GT;
374+
Value *LT;
375+
if (ScalarTy->isFloatingPointTy()) {
376+
GT = Builder.CreateFCmpOLT(Zero, X);
377+
LT = Builder.CreateFCmpOLT(X, Zero);
378+
} else {
379+
assert(ScalarTy->isIntegerTy());
380+
GT = Builder.CreateICmpSLT(Zero, X);
381+
LT = Builder.CreateICmpSLT(X, Zero);
382+
}
383+
384+
Value *ZextGT = Builder.CreateZExt(GT, RetTy);
385+
Value *ZextLT = Builder.CreateZExt(LT, RetTy);
386+
387+
return Builder.CreateSub(ZextGT, ZextLT);
388+
}
389+
362390
static bool expandIntrinsic(Function &F, CallInst *Orig) {
363391
Value *Result = nullptr;
364392
Intrinsic::ID IntrinsicId = F.getIntrinsicID();
@@ -402,6 +430,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
402430
case Intrinsic::dx_udot:
403431
Result = expandIntegerDotIntrinsic(Orig, IntrinsicId);
404432
break;
433+
case Intrinsic::dx_sign:
434+
Result = expandSignIntrinsic(Orig);
435+
break;
405436
}
406437

407438
if (Result) {

llvm/test/CodeGen/DirectX/sign.ll

+212
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
3+
4+
5+
define noundef i32 @sign_half(half noundef %a) {
6+
; CHECK-LABEL: define noundef i32 @sign_half(
7+
; CHECK-SAME: half noundef [[A:%.*]]) {
8+
; CHECK-NEXT: [[ENTRY:.*:]]
9+
; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt half 0xH0000, [[A]]
10+
; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt half [[A]], 0xH0000
11+
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
12+
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
13+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
14+
; CHECK-NEXT: ret i32 [[TMP4]]
15+
;
16+
entry:
17+
%elt.sign = call i32 @llvm.dx.sign.f16(half %a)
18+
ret i32 %elt.sign
19+
}
20+
21+
define noundef i32 @sign_float(float noundef %a) {
22+
; CHECK-LABEL: define noundef i32 @sign_float(
23+
; CHECK-SAME: float noundef [[A:%.*]]) {
24+
; CHECK-NEXT: [[ENTRY:.*:]]
25+
; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt float 0.000000e+00, [[A]]
26+
; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt float [[A]], 0.000000e+00
27+
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
28+
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
29+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
30+
; CHECK-NEXT: ret i32 [[TMP4]]
31+
;
32+
entry:
33+
%elt.sign = call i32 @llvm.dx.sign.f32(float %a)
34+
ret i32 %elt.sign
35+
}
36+
37+
define noundef i32 @sign_double(double noundef %a) {
38+
; CHECK-LABEL: define noundef i32 @sign_double(
39+
; CHECK-SAME: double noundef [[A:%.*]]) {
40+
; CHECK-NEXT: [[ENTRY:.*:]]
41+
; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt double 0.000000e+00, [[A]]
42+
; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A]], 0.000000e+00
43+
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
44+
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
45+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
46+
; CHECK-NEXT: ret i32 [[TMP4]]
47+
;
48+
entry:
49+
%elt.sign = call i32 @llvm.dx.sign.f64(double %a)
50+
ret i32 %elt.sign
51+
}
52+
53+
define noundef i32 @sign_i16(i16 noundef %a) {
54+
; CHECK-LABEL: define noundef i32 @sign_i16(
55+
; CHECK-SAME: i16 noundef [[A:%.*]]) {
56+
; CHECK-NEXT: [[ENTRY:.*:]]
57+
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i16 0, [[A]]
58+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[A]], 0
59+
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
60+
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
61+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
62+
; CHECK-NEXT: ret i32 [[TMP4]]
63+
;
64+
entry:
65+
%elt.sign = call i32 @llvm.dx.sign.i16(i16 %a)
66+
ret i32 %elt.sign
67+
}
68+
69+
define noundef i32 @sign_i32(i32 noundef %a) {
70+
; CHECK-LABEL: define noundef i32 @sign_i32(
71+
; CHECK-SAME: i32 noundef [[A:%.*]]) {
72+
; CHECK-NEXT: [[ENTRY:.*:]]
73+
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 0, [[A]]
74+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A]], 0
75+
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
76+
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
77+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
78+
; CHECK-NEXT: ret i32 [[TMP4]]
79+
;
80+
entry:
81+
%elt.sign = call i32 @llvm.dx.sign.i32(i32 %a)
82+
ret i32 %elt.sign
83+
}
84+
85+
define noundef i32 @sign_i64(i64 noundef %a) {
86+
; CHECK-LABEL: define noundef i32 @sign_i64(
87+
; CHECK-SAME: i64 noundef [[A:%.*]]) {
88+
; CHECK-NEXT: [[ENTRY:.*:]]
89+
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 0, [[A]]
90+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[A]], 0
91+
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
92+
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
93+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
94+
; CHECK-NEXT: ret i32 [[TMP4]]
95+
;
96+
entry:
97+
%elt.sign = call i32 @llvm.dx.sign.i64(i64 %a)
98+
ret i32 %elt.sign
99+
}
100+
101+
define noundef <4 x i32> @sign_half_vector(<4 x half> noundef %a) {
102+
; CHECK-LABEL: define noundef <4 x i32> @sign_half_vector(
103+
; CHECK-SAME: <4 x half> noundef [[A:%.*]]) {
104+
; CHECK-NEXT: [[ENTRY:.*:]]
105+
; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x half> zeroinitializer, [[A]]
106+
; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x half> [[A]], zeroinitializer
107+
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
108+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
109+
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
110+
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
111+
;
112+
entry:
113+
%elt.sign = call <4 x i32> @llvm.dx.sign.v4f16(<4 x half> %a)
114+
ret <4 x i32> %elt.sign
115+
}
116+
117+
define noundef <4 x i32> @sign_float_vector(<4 x float> noundef %a) {
118+
; CHECK-LABEL: define noundef <4 x i32> @sign_float_vector(
119+
; CHECK-SAME: <4 x float> noundef [[A:%.*]]) {
120+
; CHECK-NEXT: [[ENTRY:.*:]]
121+
; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> zeroinitializer, [[A]]
122+
; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[A]], zeroinitializer
123+
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
124+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
125+
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
126+
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
127+
;
128+
entry:
129+
%elt.sign = call <4 x i32> @llvm.dx.sign.v4f32(<4 x float> %a)
130+
ret <4 x i32> %elt.sign
131+
}
132+
133+
define noundef <4 x i32> @sign_double_vector(<4 x double> noundef %a) {
134+
; CHECK-LABEL: define noundef <4 x i32> @sign_double_vector(
135+
; CHECK-SAME: <4 x double> noundef [[A:%.*]]) {
136+
; CHECK-NEXT: [[ENTRY:.*:]]
137+
; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x double> zeroinitializer, [[A]]
138+
; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[A]], zeroinitializer
139+
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
140+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
141+
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
142+
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
143+
;
144+
entry:
145+
%elt.sign = call <4 x i32> @llvm.dx.sign.v4f64(<4 x double> %a)
146+
ret <4 x i32> %elt.sign
147+
}
148+
149+
define noundef <4 x i32> @sign_i16_vector(<4 x i16> noundef %a) {
150+
; CHECK-LABEL: define noundef <4 x i32> @sign_i16_vector(
151+
; CHECK-SAME: <4 x i16> noundef [[A:%.*]]) {
152+
; CHECK-NEXT: [[ENTRY:.*:]]
153+
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i16> zeroinitializer, [[A]]
154+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i16> [[A]], zeroinitializer
155+
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
156+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
157+
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
158+
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
159+
;
160+
entry:
161+
%elt.sign = call <4 x i32> @llvm.dx.sign.v4i16(<4 x i16> %a)
162+
ret <4 x i32> %elt.sign
163+
}
164+
165+
define noundef <4 x i32> @sign_i32_vector(<4 x i32> noundef %a) {
166+
; CHECK-LABEL: define noundef <4 x i32> @sign_i32_vector(
167+
; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) {
168+
; CHECK-NEXT: [[ENTRY:.*:]]
169+
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> zeroinitializer, [[A]]
170+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[A]], zeroinitializer
171+
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
172+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
173+
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
174+
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
175+
;
176+
entry:
177+
%elt.sign = call <4 x i32> @llvm.dx.sign.v4i32(<4 x i32> %a)
178+
ret <4 x i32> %elt.sign
179+
}
180+
181+
define noundef <4 x i32> @sign_i64_vector(<4 x i64> noundef %a) {
182+
; CHECK-LABEL: define noundef <4 x i32> @sign_i64_vector(
183+
; CHECK-SAME: <4 x i64> noundef [[A:%.*]]) {
184+
; CHECK-NEXT: [[ENTRY:.*:]]
185+
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i64> zeroinitializer, [[A]]
186+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i64> [[A]], zeroinitializer
187+
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
188+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
189+
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
190+
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
191+
;
192+
entry:
193+
%elt.sign = call <4 x i32> @llvm.dx.sign.v4i64(<4 x i64> %a)
194+
ret <4 x i32> %elt.sign
195+
}
196+
197+
198+
declare i32 @llvm.dx.sign.f16(half)
199+
declare i32 @llvm.dx.sign.f32(float)
200+
declare i32 @llvm.dx.sign.f64(double)
201+
202+
declare i32 @llvm.dx.sign.i16(i16)
203+
declare i32 @llvm.dx.sign.i32(i32)
204+
declare i32 @llvm.dx.sign.i64(i64)
205+
206+
declare <4 x i32> @llvm.dx.sign.v4f16(<4 x half>)
207+
declare <4 x i32> @llvm.dx.sign.v4f32(<4 x float>)
208+
declare <4 x i32> @llvm.dx.sign.v4f64(<4 x double>)
209+
210+
declare <4 x i32> @llvm.dx.sign.v4i16(<4 x i16>)
211+
declare <4 x i32> @llvm.dx.sign.v4i32(<4 x i32>)
212+
declare <4 x i32> @llvm.dx.sign.v4i64(<4 x i64>)

0 commit comments

Comments
 (0)