Skip to content

Commit ea72bbb

Browse files
committed
[DXIL] Add sign intrinsic part 2
1 parent 115b876 commit ea72bbb

File tree

3 files changed

+248
-0
lines changed

3 files changed

+248
-0
lines changed

llvm/include/llvm/IR/IntrinsicsDirectX.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,5 @@ def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLV
7979
def int_dx_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
8080
def int_dx_rcp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
8181
def int_dx_rsqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
82+
def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty]>;
8283
}

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/ADT/SmallVector.h"
1717
#include "llvm/CodeGen/Passes.h"
1818
#include "llvm/IR/IRBuilder.h"
19+
#include "llvm/IR/InstrTypes.h"
1920
#include "llvm/IR/Instruction.h"
2021
#include "llvm/IR/Instructions.h"
2122
#include "llvm/IR/Intrinsics.h"
@@ -48,6 +49,7 @@ static bool isIntrinsicExpansion(Function &F) {
4849
case Intrinsic::dx_fdot:
4950
case Intrinsic::dx_sdot:
5051
case Intrinsic::dx_udot:
52+
case Intrinsic::dx_sign:
5153
return true;
5254
}
5355
return false;
@@ -359,6 +361,32 @@ static Value *expandClampIntrinsic(CallInst *Orig,
359361
{MaxCall, Max}, nullptr, "dx.min");
360362
}
361363

364+
static Value *expandSignIntrinsic(CallInst *Orig) {
365+
IRBuilder<> Builder(Orig->getParent());
366+
Value *X = Orig->getOperand(0);
367+
Type *Ty = X->getType();
368+
Type *ScalarTy = Ty->getScalarType();
369+
Type *RetTy = Orig->getType();
370+
Constant *Zero = Constant::getNullValue(Ty);
371+
Builder.SetInsertPoint(Orig);
372+
373+
Value *GT;
374+
Value *LT;
375+
if (ScalarTy->isFloatingPointTy()) {
376+
GT = Builder.CreateFCmpOLT(Zero, X);
377+
LT = Builder.CreateFCmpOLT(X, Zero);
378+
} else {
379+
assert(ScalarTy->isIntegerTy());
380+
GT = Builder.CreateICmpSLT(Zero, X);
381+
LT = Builder.CreateICmpSLT(X, Zero);
382+
}
383+
384+
Value *ZextGT = Builder.CreateZExt(GT, RetTy);
385+
Value *ZextLT = Builder.CreateZExt(LT, RetTy);
386+
387+
return Builder.CreateSub(ZextGT, ZextLT);
388+
}
389+
362390
static bool expandIntrinsic(Function &F, CallInst *Orig) {
363391
Value *Result = nullptr;
364392
Intrinsic::ID IntrinsicId = F.getIntrinsicID();
@@ -402,6 +430,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
402430
case Intrinsic::dx_udot:
403431
Result = expandIntegerDotIntrinsic(Orig, IntrinsicId);
404432
break;
433+
case Intrinsic::dx_sign:
434+
Result = expandSignIntrinsic(Orig);
435+
break;
405436
}
406437

407438
if (Result) {

llvm/test/CodeGen/DirectX/sign.ll

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK
3+
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
4+
5+
6+
define noundef i32 @sign_half(half noundef %a) {
7+
; CHECK-LABEL: define noundef i32 @sign_half(
8+
; CHECK-SAME: half noundef [[A:%.*]]) {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt half 0xH0000, [[A]]
11+
; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt half [[A]], 0xH0000
12+
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
13+
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
14+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
15+
; CHECK-NEXT: ret i32 [[TMP4]]
16+
;
17+
entry:
18+
%elt.sign = call i32 @llvm.dx.sign.f16(half %a)
19+
ret i32 %elt.sign
20+
}
21+
22+
define noundef i32 @sign_float(float noundef %a) {
23+
; CHECK-LABEL: define noundef i32 @sign_float(
24+
; CHECK-SAME: float noundef [[A:%.*]]) {
25+
; CHECK-NEXT: [[ENTRY:.*:]]
26+
; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt float 0.000000e+00, [[A]]
27+
; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt float [[A]], 0.000000e+00
28+
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
29+
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
30+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
31+
; CHECK-NEXT: ret i32 [[TMP4]]
32+
;
33+
entry:
34+
%elt.sign = call i32 @llvm.dx.sign.f32(float %a)
35+
ret i32 %elt.sign
36+
}
37+
38+
define noundef i32 @sign_double(double noundef %a) {
39+
; CHECK-LABEL: define noundef i32 @sign_double(
40+
; CHECK-SAME: double noundef [[A:%.*]]) {
41+
; CHECK-NEXT: [[ENTRY:.*:]]
42+
; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt double 0.000000e+00, [[A]]
43+
; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A]], 0.000000e+00
44+
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
45+
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
46+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
47+
; CHECK-NEXT: ret i32 [[TMP4]]
48+
;
49+
entry:
50+
%elt.sign = call i32 @llvm.dx.sign.f64(double %a)
51+
ret i32 %elt.sign
52+
}
53+
54+
define noundef i32 @sign_i16(i16 noundef %a) {
55+
; CHECK-LABEL: define noundef i32 @sign_i16(
56+
; CHECK-SAME: i16 noundef [[A:%.*]]) {
57+
; CHECK-NEXT: [[ENTRY:.*:]]
58+
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i16 0, [[A]]
59+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[A]], 0
60+
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
61+
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
62+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
63+
; CHECK-NEXT: ret i32 [[TMP4]]
64+
;
65+
entry:
66+
%elt.sign = call i32 @llvm.dx.sign.i16(i16 %a)
67+
ret i32 %elt.sign
68+
}
69+
70+
define noundef i32 @sign_i32(i32 noundef %a) {
71+
; CHECK-LABEL: define noundef i32 @sign_i32(
72+
; CHECK-SAME: i32 noundef [[A:%.*]]) {
73+
; CHECK-NEXT: [[ENTRY:.*:]]
74+
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 0, [[A]]
75+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A]], 0
76+
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
77+
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
78+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
79+
; CHECK-NEXT: ret i32 [[TMP4]]
80+
;
81+
entry:
82+
%elt.sign = call i32 @llvm.dx.sign.i32(i32 %a)
83+
ret i32 %elt.sign
84+
}
85+
86+
define noundef i32 @sign_i64(i64 noundef %a) {
87+
; CHECK-LABEL: define noundef i32 @sign_i64(
88+
; CHECK-SAME: i64 noundef [[A:%.*]]) {
89+
; CHECK-NEXT: [[ENTRY:.*:]]
90+
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 0, [[A]]
91+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[A]], 0
92+
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
93+
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
94+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
95+
; CHECK-NEXT: ret i32 [[TMP4]]
96+
;
97+
entry:
98+
%elt.sign = call i32 @llvm.dx.sign.i64(i64 %a)
99+
ret i32 %elt.sign
100+
}
101+
102+
define noundef <4 x i32> @sign_half_vector(<4 x half> noundef %a) {
103+
; CHECK-LABEL: define noundef <4 x i32> @sign_half_vector(
104+
; CHECK-SAME: <4 x half> noundef [[A:%.*]]) {
105+
; CHECK-NEXT: [[ENTRY:.*:]]
106+
; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x half> zeroinitializer, [[A]]
107+
; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x half> [[A]], zeroinitializer
108+
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
109+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
110+
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
111+
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
112+
;
113+
entry:
114+
%elt.sign = call <4 x i32> @llvm.dx.sign.v4f16(<4 x half> %a)
115+
ret <4 x i32> %elt.sign
116+
}
117+
118+
define noundef <4 x i32> @sign_float_vector(<4 x float> noundef %a) {
119+
; CHECK-LABEL: define noundef <4 x i32> @sign_float_vector(
120+
; CHECK-SAME: <4 x float> noundef [[A:%.*]]) {
121+
; CHECK-NEXT: [[ENTRY:.*:]]
122+
; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> zeroinitializer, [[A]]
123+
; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[A]], zeroinitializer
124+
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
125+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
126+
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
127+
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
128+
;
129+
entry:
130+
%elt.sign = call <4 x i32> @llvm.dx.sign.v4f32(<4 x float> %a)
131+
ret <4 x i32> %elt.sign
132+
}
133+
134+
define noundef <4 x i32> @sign_double_vector(<4 x double> noundef %a) {
135+
; CHECK-LABEL: define noundef <4 x i32> @sign_double_vector(
136+
; CHECK-SAME: <4 x double> noundef [[A:%.*]]) {
137+
; CHECK-NEXT: [[ENTRY:.*:]]
138+
; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x double> zeroinitializer, [[A]]
139+
; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[A]], zeroinitializer
140+
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
141+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
142+
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
143+
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
144+
;
145+
entry:
146+
%elt.sign = call <4 x i32> @llvm.dx.sign.v4f64(<4 x double> %a)
147+
ret <4 x i32> %elt.sign
148+
}
149+
150+
define noundef <4 x i32> @sign_i16_vector(<4 x i16> noundef %a) {
151+
; CHECK-LABEL: define noundef <4 x i32> @sign_i16_vector(
152+
; CHECK-SAME: <4 x i16> noundef [[A:%.*]]) {
153+
; CHECK-NEXT: [[ENTRY:.*:]]
154+
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i16> zeroinitializer, [[A]]
155+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i16> [[A]], zeroinitializer
156+
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
157+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
158+
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
159+
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
160+
;
161+
entry:
162+
%elt.sign = call <4 x i32> @llvm.dx.sign.v4i16(<4 x i16> %a)
163+
ret <4 x i32> %elt.sign
164+
}
165+
166+
define noundef <4 x i32> @sign_i32_vector(<4 x i32> noundef %a) {
167+
; CHECK-LABEL: define noundef <4 x i32> @sign_i32_vector(
168+
; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) {
169+
; CHECK-NEXT: [[ENTRY:.*:]]
170+
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> zeroinitializer, [[A]]
171+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[A]], zeroinitializer
172+
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
173+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
174+
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
175+
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
176+
;
177+
entry:
178+
%elt.sign = call <4 x i32> @llvm.dx.sign.v4i32(<4 x i32> %a)
179+
ret <4 x i32> %elt.sign
180+
}
181+
182+
define noundef <4 x i32> @sign_i64_vector(<4 x i64> noundef %a) {
183+
; CHECK-LABEL: define noundef <4 x i32> @sign_i64_vector(
184+
; CHECK-SAME: <4 x i64> noundef [[A:%.*]]) {
185+
; CHECK-NEXT: [[ENTRY:.*:]]
186+
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i64> zeroinitializer, [[A]]
187+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i64> [[A]], zeroinitializer
188+
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
189+
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
190+
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
191+
; CHECK-NEXT: ret <4 x i32> [[TMP4]]
192+
;
193+
entry:
194+
%elt.sign = call <4 x i32> @llvm.dx.sign.v4i64(<4 x i64> %a)
195+
ret <4 x i32> %elt.sign
196+
}
197+
198+
199+
declare i32 @llvm.dx.sign.f16(half)
200+
declare i32 @llvm.dx.sign.f32(float)
201+
declare i32 @llvm.dx.sign.f64(double)
202+
203+
declare i32 @llvm.dx.sign.i16(i16)
204+
declare i32 @llvm.dx.sign.i32(i32)
205+
declare i32 @llvm.dx.sign.i64(i64)
206+
207+
declare <4 x i32> @llvm.dx.sign.v4f16(<4 x half>)
208+
declare <4 x i32> @llvm.dx.sign.v4f32(<4 x float>)
209+
declare <4 x i32> @llvm.dx.sign.v4f64(<4 x double>)
210+
211+
declare <4 x i32> @llvm.dx.sign.v4i16(<4 x i16>)
212+
declare <4 x i32> @llvm.dx.sign.v4i32(<4 x i32>)
213+
declare <4 x i32> @llvm.dx.sign.v4i64(<4 x i64>)
214+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
215+
; DOPCHECK: {{.*}}
216+
; EXPCHECK: {{.*}}

0 commit comments

Comments
 (0)