Skip to content

[HLSL] Implement elementwise firstbitlow builtin #116858

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -4855,6 +4855,12 @@ def HLSLFirstBitHigh : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}

def HLSLFirstBitLow : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_firstbitlow"];
let Attributes = [NoThrow, Const];
let Prototype = "void(...)";
}

def HLSLFrac : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_frac"];
let Attributes = [NoThrow, Const];
Expand Down
9 changes: 8 additions & 1 deletion clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19316,14 +19316,21 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
"hlsl.dot4add.u8packed");
}
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {

Value *X = EmitScalarExpr(E->getArg(0));

return Builder.CreateIntrinsic(
/*ReturnType=*/ConvertType(E->getType()),
getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()),
ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
}
case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
Value *X = EmitScalarExpr(E->getArg(0));

return Builder.CreateIntrinsic(
/*ReturnType=*/ConvertType(E->getType()),
CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
nullptr, "hlsl.firstbitlow");
}
case Builtin::BI__builtin_hlsl_lerp: {
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGHLSLRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitLow, firstbitlow)
GENERATE_HLSL_INTRINSIC_FUNCTION(NClamp, nclamp)
GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp)
GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp)
Expand Down
72 changes: 72 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,78 @@ uint3 firstbithigh(uint64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint4 firstbithigh(uint64_t4);

//===----------------------------------------------------------------------===//
// firstbitlow builtins
//===----------------------------------------------------------------------===//

/// \fn T firstbitlow(T Val)
/// \brief Returns the location of the first set bit starting from the lowest
/// order bit and working upward, per component.
/// \param Val the input value.

#ifdef __HLSL_ENABLE_16_BIT
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint firstbitlow(int16_t);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint2 firstbitlow(int16_t2);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint3 firstbitlow(int16_t3);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint4 firstbitlow(int16_t4);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint firstbitlow(uint16_t);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint2 firstbitlow(uint16_t2);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint3 firstbitlow(uint16_t3);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint4 firstbitlow(uint16_t4);
#endif

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint firstbitlow(int);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint2 firstbitlow(int2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint3 firstbitlow(int3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint4 firstbitlow(int4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint firstbitlow(uint);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint2 firstbitlow(uint2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint3 firstbitlow(uint3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint4 firstbitlow(uint4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint firstbitlow(int64_t);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint2 firstbitlow(int64_t2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint3 firstbitlow(int64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint4 firstbitlow(int64_t4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint firstbitlow(uint64_t);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint2 firstbitlow(uint64_t2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint3 firstbitlow(uint64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint4 firstbitlow(uint64_t4);

//===----------------------------------------------------------------------===//
// floor builtins
//===----------------------------------------------------------------------===//
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Sema/SemaHLSL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2036,7 +2036,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh:
case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall))
return true;

Expand Down
153 changes: 153 additions & 0 deletions clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s -DTARGET=spv

#ifdef __HLSL_ENABLE_16_BIT
// CHECK-LABEL: test_firstbitlow_ushort
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
uint test_firstbitlow_ushort(uint16_t p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_ushort2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
uint2 test_firstbitlow_ushort2(uint16_t2 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_ushort3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
uint3 test_firstbitlow_ushort3(uint16_t3 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_ushort4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
uint4 test_firstbitlow_ushort4(uint16_t4 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_short
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
uint test_firstbitlow_short(int16_t p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_short2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
uint2 test_firstbitlow_short2(int16_t2 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_short3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
uint3 test_firstbitlow_short3(int16_t3 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_short4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
uint4 test_firstbitlow_short4(int16_t4 p0) {
return firstbitlow(p0);
}
#endif // __HLSL_ENABLE_16_BIT

// CHECK-LABEL: test_firstbitlow_uint
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
uint test_firstbitlow_uint(uint p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_uint2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
uint2 test_firstbitlow_uint2(uint2 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_uint3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
uint3 test_firstbitlow_uint3(uint3 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_uint4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
uint4 test_firstbitlow_uint4(uint4 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_ulong
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
uint test_firstbitlow_ulong(uint64_t p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_ulong2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
uint2 test_firstbitlow_ulong2(uint64_t2 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_ulong3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
uint3 test_firstbitlow_ulong3(uint64_t3 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_ulong4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
uint4 test_firstbitlow_ulong4(uint64_t4 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_int
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
uint test_firstbitlow_int(int p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_int2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
uint2 test_firstbitlow_int2(int2 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_int3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
uint3 test_firstbitlow_int3(int3 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_int4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
uint4 test_firstbitlow_int4(int4 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_long
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
uint test_firstbitlow_long(int64_t p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_long2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
uint2 test_firstbitlow_long2(int64_t2 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_long3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
uint3 test_firstbitlow_long3(int64_t3 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_long4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
uint4 test_firstbitlow_long4(int64_t4 p0) {
return firstbitlow(p0);
}
6 changes: 2 additions & 4 deletions clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@ double test_int_builtin(double p0) {

double2 test_int_builtin_2(double2 p0) {
return __builtin_hlsl_elementwise_firstbithigh(p0);
// expected-error@-1 {{1st argument must be a vector of integers
// (was 'double2' (aka 'vector<double, 2>'))}}
// expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
}

float test_int_builtin_3(float p0) {
return __builtin_hlsl_elementwise_firstbithigh(p0);
// expected-error@-1 {{1st argument must be a vector of integers
// (was 'float')}}
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test framework doesn't actually assert these lines when split. I had to unsplit them which raised a test failure that was then fixed

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know you can do a \ continuation for multiple expected directives, but I don't know if you can do that inside the match string. For example I know this works:

  <some_bad_code> // expected-error {{ first error }} \
                  // expected-error {{ second error }}

}
26 changes: 26 additions & 0 deletions clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected

int test_too_few_arg() {
return firstbitlow();
// expected-error@-1 {{no matching function for call to 'firstbitlow'}}
}

int test_too_many_arg(int p0) {
return firstbitlow(p0, p0);
// expected-error@-1 {{no matching function for call to 'firstbitlow'}}
}

double test_int_builtin(double p0) {
return firstbitlow(p0);
// expected-error@-1 {{call to 'firstbitlow' is ambiguous}}
}

double2 test_int_builtin_2(double2 p0) {
return __builtin_hlsl_elementwise_firstbitlow(p0);
// expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
}

float test_int_builtin_3(float p0) {
return __builtin_hlsl_elementwise_firstbitlow(p0);
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
}
1 change: 1 addition & 0 deletions llvm/include/llvm/IR/IntrinsicsDirectX.td
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>
def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;

def int_dx_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
}
1 change: 1 addition & 0 deletions llvm/include/llvm/IR/IntrinsicsSPIRV.td
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ let TargetPrefix = "spv" in {

def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_spv_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;

def int_spv_resource_updatecounter
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty],
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/DirectX/DXIL.td
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,18 @@ def CountBits : DXILOp<31, unaryBits> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def FirstbitLo : DXILOp<32, unaryBits> {
let Doc = "Returns the location of the first set bit starting from "
"the lowest order bit and working upward.";
let intrinsics = [ IntrinSelect<int_dx_firstbitlow> ];
let arguments = [OverloadTy];
let result = Int32Ty;
let overloads =
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
let stages = [Stages<DXIL1_0, [all_stages]>];
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def FirstbitHi : DXILOp<33, unaryBits> {
let Doc = "Returns the location of the first set bit starting from "
"the highest order bit and working downward.";
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
case Intrinsic::dx_splitdouble:
case Intrinsic::dx_firstbituhigh:
case Intrinsic::dx_firstbitshigh:
case Intrinsic::dx_firstbitlow:
return true;
default:
return false;
Expand Down
Loading
Loading