Skip to content

Commit 4f48abf

Browse files
V-FEXrts-perron
andauthored
[HLSL] Implement elementwise firstbitlow builtin (#116858)
Closes #99116 Implements `firstbitlow` by extracting common functionality from `firstbithigh` into a shared function while also fixing a bug for an edge case where `u64x3` and larger vectors will attempt to create vectors larger than the SPRIV max of 4. --------- Co-authored-by: Steven Perron <[email protected]>
1 parent 4a4a8a1 commit 4f48abf

File tree

17 files changed

+1021
-134
lines changed

17 files changed

+1021
-134
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4855,6 +4855,12 @@ def HLSLFirstBitHigh : LangBuiltin<"HLSL_LANG"> {
48554855
let Prototype = "void(...)";
48564856
}
48574857

4858+
def HLSLFirstBitLow : LangBuiltin<"HLSL_LANG"> {
4859+
let Spellings = ["__builtin_hlsl_elementwise_firstbitlow"];
4860+
let Attributes = [NoThrow, Const];
4861+
let Prototype = "void(...)";
4862+
}
4863+
48584864
def HLSLFrac : LangBuiltin<"HLSL_LANG"> {
48594865
let Spellings = ["__builtin_hlsl_elementwise_frac"];
48604866
let Attributes = [NoThrow, Const];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19316,14 +19316,21 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
1931619316
"hlsl.dot4add.u8packed");
1931719317
}
1931819318
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
19319-
1932019319
Value *X = EmitScalarExpr(E->getArg(0));
1932119320

1932219321
return Builder.CreateIntrinsic(
1932319322
/*ReturnType=*/ConvertType(E->getType()),
1932419323
getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()),
1932519324
ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
1932619325
}
19326+
case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
19327+
Value *X = EmitScalarExpr(E->getArg(0));
19328+
19329+
return Builder.CreateIntrinsic(
19330+
/*ReturnType=*/ConvertType(E->getType()),
19331+
CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
19332+
nullptr, "hlsl.firstbitlow");
19333+
}
1932719334
case Builtin::BI__builtin_hlsl_lerp: {
1932819335
Value *X = EmitScalarExpr(E->getArg(0));
1932919336
Value *Y = EmitScalarExpr(E->getArg(1));

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ class CGHLSLRuntime {
9999
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
100100
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
101101
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
102+
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitLow, firstbitlow)
102103
GENERATE_HLSL_INTRINSIC_FUNCTION(NClamp, nclamp)
103104
GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp)
104105
GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp)

clang/lib/Headers/hlsl/hlsl_intrinsics.h

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,6 +1150,78 @@ uint3 firstbithigh(uint64_t3);
11501150
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
11511151
uint4 firstbithigh(uint64_t4);
11521152

1153+
//===----------------------------------------------------------------------===//
1154+
// firstbitlow builtins
1155+
//===----------------------------------------------------------------------===//
1156+
1157+
/// \fn T firstbitlow(T Val)
1158+
/// \brief Returns the location of the first set bit starting from the lowest
1159+
/// order bit and working upward, per component.
1160+
/// \param Val the input value.
1161+
1162+
#ifdef __HLSL_ENABLE_16_BIT
1163+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1164+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1165+
uint firstbitlow(int16_t);
1166+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1167+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1168+
uint2 firstbitlow(int16_t2);
1169+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1170+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1171+
uint3 firstbitlow(int16_t3);
1172+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1173+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1174+
uint4 firstbitlow(int16_t4);
1175+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1176+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1177+
uint firstbitlow(uint16_t);
1178+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1179+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1180+
uint2 firstbitlow(uint16_t2);
1181+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1182+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1183+
uint3 firstbitlow(uint16_t3);
1184+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1185+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1186+
uint4 firstbitlow(uint16_t4);
1187+
#endif
1188+
1189+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1190+
uint firstbitlow(int);
1191+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1192+
uint2 firstbitlow(int2);
1193+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1194+
uint3 firstbitlow(int3);
1195+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1196+
uint4 firstbitlow(int4);
1197+
1198+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1199+
uint firstbitlow(uint);
1200+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1201+
uint2 firstbitlow(uint2);
1202+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1203+
uint3 firstbitlow(uint3);
1204+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1205+
uint4 firstbitlow(uint4);
1206+
1207+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1208+
uint firstbitlow(int64_t);
1209+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1210+
uint2 firstbitlow(int64_t2);
1211+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1212+
uint3 firstbitlow(int64_t3);
1213+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1214+
uint4 firstbitlow(int64_t4);
1215+
1216+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1217+
uint firstbitlow(uint64_t);
1218+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1219+
uint2 firstbitlow(uint64_t2);
1220+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1221+
uint3 firstbitlow(uint64_t3);
1222+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1223+
uint4 firstbitlow(uint64_t4);
1224+
11531225
//===----------------------------------------------------------------------===//
11541226
// floor builtins
11551227
//===----------------------------------------------------------------------===//

clang/lib/Sema/SemaHLSL.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2036,7 +2036,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
20362036
return true;
20372037
break;
20382038
}
2039-
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
2039+
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh:
2040+
case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
20402041
if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall))
20412042
return true;
20422043

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
2+
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
3+
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx
4+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
5+
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
6+
// RUN: -emit-llvm -disable-llvm-passes \
7+
// RUN: -o - | FileCheck %s -DTARGET=spv
8+
9+
#ifdef __HLSL_ENABLE_16_BIT
10+
// CHECK-LABEL: test_firstbitlow_ushort
11+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
12+
uint test_firstbitlow_ushort(uint16_t p0) {
13+
return firstbitlow(p0);
14+
}
15+
16+
// CHECK-LABEL: test_firstbitlow_ushort2
17+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
18+
uint2 test_firstbitlow_ushort2(uint16_t2 p0) {
19+
return firstbitlow(p0);
20+
}
21+
22+
// CHECK-LABEL: test_firstbitlow_ushort3
23+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
24+
uint3 test_firstbitlow_ushort3(uint16_t3 p0) {
25+
return firstbitlow(p0);
26+
}
27+
28+
// CHECK-LABEL: test_firstbitlow_ushort4
29+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
30+
uint4 test_firstbitlow_ushort4(uint16_t4 p0) {
31+
return firstbitlow(p0);
32+
}
33+
34+
// CHECK-LABEL: test_firstbitlow_short
35+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
36+
uint test_firstbitlow_short(int16_t p0) {
37+
return firstbitlow(p0);
38+
}
39+
40+
// CHECK-LABEL: test_firstbitlow_short2
41+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
42+
uint2 test_firstbitlow_short2(int16_t2 p0) {
43+
return firstbitlow(p0);
44+
}
45+
46+
// CHECK-LABEL: test_firstbitlow_short3
47+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
48+
uint3 test_firstbitlow_short3(int16_t3 p0) {
49+
return firstbitlow(p0);
50+
}
51+
52+
// CHECK-LABEL: test_firstbitlow_short4
53+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
54+
uint4 test_firstbitlow_short4(int16_t4 p0) {
55+
return firstbitlow(p0);
56+
}
57+
#endif // __HLSL_ENABLE_16_BIT
58+
59+
// CHECK-LABEL: test_firstbitlow_uint
60+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
61+
uint test_firstbitlow_uint(uint p0) {
62+
return firstbitlow(p0);
63+
}
64+
65+
// CHECK-LABEL: test_firstbitlow_uint2
66+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
67+
uint2 test_firstbitlow_uint2(uint2 p0) {
68+
return firstbitlow(p0);
69+
}
70+
71+
// CHECK-LABEL: test_firstbitlow_uint3
72+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
73+
uint3 test_firstbitlow_uint3(uint3 p0) {
74+
return firstbitlow(p0);
75+
}
76+
77+
// CHECK-LABEL: test_firstbitlow_uint4
78+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
79+
uint4 test_firstbitlow_uint4(uint4 p0) {
80+
return firstbitlow(p0);
81+
}
82+
83+
// CHECK-LABEL: test_firstbitlow_ulong
84+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
85+
uint test_firstbitlow_ulong(uint64_t p0) {
86+
return firstbitlow(p0);
87+
}
88+
89+
// CHECK-LABEL: test_firstbitlow_ulong2
90+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
91+
uint2 test_firstbitlow_ulong2(uint64_t2 p0) {
92+
return firstbitlow(p0);
93+
}
94+
95+
// CHECK-LABEL: test_firstbitlow_ulong3
96+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
97+
uint3 test_firstbitlow_ulong3(uint64_t3 p0) {
98+
return firstbitlow(p0);
99+
}
100+
101+
// CHECK-LABEL: test_firstbitlow_ulong4
102+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
103+
uint4 test_firstbitlow_ulong4(uint64_t4 p0) {
104+
return firstbitlow(p0);
105+
}
106+
107+
// CHECK-LABEL: test_firstbitlow_int
108+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
109+
uint test_firstbitlow_int(int p0) {
110+
return firstbitlow(p0);
111+
}
112+
113+
// CHECK-LABEL: test_firstbitlow_int2
114+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
115+
uint2 test_firstbitlow_int2(int2 p0) {
116+
return firstbitlow(p0);
117+
}
118+
119+
// CHECK-LABEL: test_firstbitlow_int3
120+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
121+
uint3 test_firstbitlow_int3(int3 p0) {
122+
return firstbitlow(p0);
123+
}
124+
125+
// CHECK-LABEL: test_firstbitlow_int4
126+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
127+
uint4 test_firstbitlow_int4(int4 p0) {
128+
return firstbitlow(p0);
129+
}
130+
131+
// CHECK-LABEL: test_firstbitlow_long
132+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
133+
uint test_firstbitlow_long(int64_t p0) {
134+
return firstbitlow(p0);
135+
}
136+
137+
// CHECK-LABEL: test_firstbitlow_long2
138+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
139+
uint2 test_firstbitlow_long2(int64_t2 p0) {
140+
return firstbitlow(p0);
141+
}
142+
143+
// CHECK-LABEL: test_firstbitlow_long3
144+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
145+
uint3 test_firstbitlow_long3(int64_t3 p0) {
146+
return firstbitlow(p0);
147+
}
148+
149+
// CHECK-LABEL: test_firstbitlow_long4
150+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
151+
uint4 test_firstbitlow_long4(int64_t4 p0) {
152+
return firstbitlow(p0);
153+
}

clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,10 @@ double test_int_builtin(double p0) {
1717

1818
double2 test_int_builtin_2(double2 p0) {
1919
return __builtin_hlsl_elementwise_firstbithigh(p0);
20-
// expected-error@-1 {{1st argument must be a vector of integers
21-
// (was 'double2' (aka 'vector<double, 2>'))}}
20+
// expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
2221
}
2322

2423
float test_int_builtin_3(float p0) {
2524
return __builtin_hlsl_elementwise_firstbithigh(p0);
26-
// expected-error@-1 {{1st argument must be a vector of integers
27-
// (was 'float')}}
25+
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
2826
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected
2+
3+
int test_too_few_arg() {
4+
return firstbitlow();
5+
// expected-error@-1 {{no matching function for call to 'firstbitlow'}}
6+
}
7+
8+
int test_too_many_arg(int p0) {
9+
return firstbitlow(p0, p0);
10+
// expected-error@-1 {{no matching function for call to 'firstbitlow'}}
11+
}
12+
13+
double test_int_builtin(double p0) {
14+
return firstbitlow(p0);
15+
// expected-error@-1 {{call to 'firstbitlow' is ambiguous}}
16+
}
17+
18+
double2 test_int_builtin_2(double2 p0) {
19+
return __builtin_hlsl_elementwise_firstbitlow(p0);
20+
// expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
21+
}
22+
23+
float test_int_builtin_3(float p0) {
24+
return __builtin_hlsl_elementwise_firstbitlow(p0);
25+
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
26+
}

llvm/include/llvm/IR/IntrinsicsDirectX.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>
115115
def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
116116
def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
117117
def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
118+
def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
118119

119120
def int_dx_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
120121
}

llvm/include/llvm/IR/IntrinsicsSPIRV.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ let TargetPrefix = "spv" in {
113113

114114
def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
115115
def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
116+
def int_spv_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
116117

117118
def int_spv_resource_updatecounter
118119
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty],

llvm/lib/Target/DirectX/DXIL.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,18 @@ def CountBits : DXILOp<31, unaryBits> {
620620
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
621621
}
622622

623+
def FirstbitLo : DXILOp<32, unaryBits> {
624+
let Doc = "Returns the location of the first set bit starting from "
625+
"the lowest order bit and working upward.";
626+
let intrinsics = [ IntrinSelect<int_dx_firstbitlow> ];
627+
let arguments = [OverloadTy];
628+
let result = Int32Ty;
629+
let overloads =
630+
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
631+
let stages = [Stages<DXIL1_0, [all_stages]>];
632+
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
633+
}
634+
623635
def FirstbitHi : DXILOp<33, unaryBits> {
624636
let Doc = "Returns the location of the first set bit starting from "
625637
"the highest order bit and working downward.";

llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
4545
case Intrinsic::dx_splitdouble:
4646
case Intrinsic::dx_firstbituhigh:
4747
case Intrinsic::dx_firstbitshigh:
48+
case Intrinsic::dx_firstbitlow:
4849
return true;
4950
default:
5051
return false;

0 commit comments

Comments
 (0)