Skip to content

Add length builtins and length HLSL function to DirectX Backend #101256

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Aug 3, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/docs/LanguageExtensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,7 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
T __builtin_elementwise_cosh(T x) return the hyperbolic cosine of angle x in radians floating point types
T __builtin_elementwise_tanh(T x) return the hyperbolic tangent of angle x in radians floating point types
T __builtin_elementwise_floor(T x) return the largest integral value less than or equal to x floating point types
T __builtin_elementwise_length(T x) return the length of the specified floating-point vector floating point types
T __builtin_elementwise_log(T x) return the natural logarithm of x floating point types
T __builtin_elementwise_log2(T x) return the base 2 logarithm of x floating point types
T __builtin_elementwise_log10(T x) return the base 10 logarithm of x floating point types
Expand Down
1 change: 1 addition & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ DWARF Support in Clang

Floating Point Support in Clang
-------------------------------
- Add ``__builtin_elementwise_length``builtin for floating point types only.

Fixed Point Support in Clang
----------------------------
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -4707,6 +4707,12 @@ def HLSLIsinf : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}

def HLSLLength : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_length"];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be "__builtin_hlsl_length", not "__builtin_hlsl_elementwise_length". The word "elementwise" means we do an operation to each element, whereas this is an operation on the whole vector.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with Justin, This shouldn't be an elementwise builtin see lerp, dot, and mad. For operations that work on vectors and generate scalar results we don't add the elementwise naming convention. Reserve elementwise for cases were the function takes multiple inputs applys the same operation to each input then returns the same number of outputs.

let Attributes = [NoThrow, Const];
let Prototype = "void(...)";
}

def HLSLLerp : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_lerp"];
let Attributes = [NoThrow, Const];
Expand Down
16 changes: 16 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18460,6 +18460,22 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
/*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
}
case Builtin::BI__builtin_hlsl_elementwise_length: {
Value *X = EmitScalarExpr(E->getArg(0));

if (!E->getArg(0)->getType()->hasFloatingRepresentation())
llvm_unreachable("length operand must have a float representation");
// if the operand is a scalar, we can use the fabs llvm intrinsic directly
if (!E->getArg(0)->getType()->isVectorType()) {
llvm::Type *ResultType = ConvertType(E->getType());
Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
return Builder.CreateCall(F, X);
}
return Builder.CreateIntrinsic(
/*ReturnType=*/X->getType()->getScalarType(),
CGM.getHLSLRuntime().getLengthIntrinsic(),
ArrayRef<Value *>{X}, nullptr, "hlsl.length");
}
case Builtin::BI__builtin_hlsl_elementwise_frac: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGHLSLRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(All, all)
GENERATE_HLSL_INTRINSIC_FUNCTION(Any, any)
GENERATE_HLSL_INTRINSIC_FUNCTION(Frac, frac)
GENERATE_HLSL_INTRINSIC_FUNCTION(Length, length)
GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
Expand Down
33 changes: 33 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,39 @@ float3 lerp(float3, float3, float3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp)
float4 lerp(float4, float4, float4);


//===----------------------------------------------------------------------===//
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR title looks like it may be misnamed, since this is also adding the HLSL builtin functions, it is doing more than just adding it to the backend.

// length builtins
//===----------------------------------------------------------------------===//

/// \fn T length(T x)
/// \brief Returns the length of the specified floating-point vector.
/// \param x [in] The vector of floats, or a scalar float.
///
/// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + …).

_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length)
half length(half);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length)
half length(half2);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length)
half length(half3);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length)
half length(half4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length)
float length(float);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length)
float length(float2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length)
float length(float3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length)
float length(float4);

//===----------------------------------------------------------------------===//
// log builtins
//===----------------------------------------------------------------------===//
Expand Down
22 changes: 22 additions & 0 deletions clang/lib/Sema/SemaHLSL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1079,6 +1079,28 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
case Builtin::BI__builtin_hlsl_elementwise_length: {
if (SemaRef.checkArgCount(TheCall, 1))
return true;
if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall))
return true;

ExprResult A = TheCall->getArg(0);
QualType ArgTyA = A.get()->getType();
QualType RetTy;

if (auto *VTy = ArgTyA->getAs<VectorType>())
RetTy = VTy->getElementType();
else
RetTy = TheCall->getArg(0)->getType();

TheCall->setType(RetTy);


if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
return true;
break;
}
case Builtin::BI__builtin_hlsl_mad: {
if (SemaRef.checkArgCount(TheCall, 3))
return true;
Expand Down
73 changes: 73 additions & 0 deletions clang/test/CodeGenHLSL/builtins/length.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
// RUN: --check-prefixes=CHECK,NATIVE_HALF
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF

// NATIVE_HALF: define noundef half @
// NATIVE_HALF: call half @llvm.fabs.f16(half
// NO_HALF: call float @llvm.fabs.f32(float
// NATIVE_HALF: ret half
// NO_HALF: ret float
half test_length_half(half p0)
{
return length(p0);
}
// NATIVE_HALF: define noundef half @
// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v2f16
// NO_HALF: %hlsl.length = call float @llvm.dx.length.v2f32(
// NATIVE_HALF: ret half %hlsl.length
// NO_HALF: ret float %hlsl.length
half test_length_half2(half2 p0)
{
return length(p0);
}
// NATIVE_HALF: define noundef half @
// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v3f16
// NO_HALF: %hlsl.length = call float @llvm.dx.length.v3f32(
// NATIVE_HALF: ret half %hlsl.length
// NO_HALF: ret float %hlsl.length
half test_length_half3(half3 p0)
{
return length(p0);
}
// NATIVE_HALF: define noundef half @
// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v4f16
// NO_HALF: %hlsl.length = call float @llvm.dx.length.v4f32(
// NATIVE_HALF: ret half %hlsl.length
// NO_HALF: ret float %hlsl.length
half test_length_half4(half4 p0)
{
return length(p0);
}

// CHECK: define noundef float @
// CHECK: call float @llvm.fabs.f32(float
// CHECK: ret float
float test_length_float(float p0)
{
return length(p0);
}
// CHECK: define noundef float @
// CHECK: %hlsl.length = call float @llvm.dx.length.v2f32(
// CHECK: ret float %hlsl.length
float test_length_float2(float2 p0)
{
return length(p0);
}
// CHECK: define noundef float @
// CHECK: %hlsl.length = call float @llvm.dx.length.v3f32(
// CHECK: ret float %hlsl.length
float test_length_float3(float3 p0)
{
return length(p0);
}
// CHECK: define noundef float @
// CHECK: %hlsl.length = call float @llvm.dx.length.v4f32(
// CHECK: ret float %hlsl.length
float test_length_float4(float4 p0)
{
return length(p0);
}
31 changes: 31 additions & 0 deletions clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected

bool test_too_few_arg()
{
return __builtin_hlsl_elementwise_length();
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
}

bool2 test_too_many_arg(float2 p0)
{
return __builtin_hlsl_elementwise_length(p0, p0);
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
}

bool builtin_bool_to_float_type_promotion(bool p1)
{
return __builtin_hlsl_elementwise_length(p1);
// expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
}

bool builtin_length_int_to_float_promotion(int p1)
{
return __builtin_hlsl_elementwise_length(p1);
// expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
}

bool2 builtin_length_int2_to_float2_promotion(int2 p1)
{
return __builtin_hlsl_elementwise_length(p1);
// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
}
2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsDirectX.td
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def int_dx_isinf :
def int_dx_lerp : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
[IntrNoMem, IntrWillReturn] >;

def int_dx_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty]>;

def int_dx_imad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_dx_rcp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsSPIRV.td
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,7 @@ let TargetPrefix = "spv" in {
def int_spv_frac : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
def int_spv_lerp : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
[IntrNoMem, IntrWillReturn] >;
def int_spv_length : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
[IntrNoMem, IntrWillReturn] >;
def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
}
39 changes: 39 additions & 0 deletions llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::dx_clamp:
case Intrinsic::dx_uclamp:
case Intrinsic::dx_lerp:
case Intrinsic::dx_length:
case Intrinsic::dx_sdot:
case Intrinsic::dx_udot:
return true;
Expand Down Expand Up @@ -157,6 +158,42 @@ static bool expandAnyIntrinsic(CallInst *Orig) {
return true;
}

static bool expandLengthIntrinsic(CallInst *Orig) {
Value *X = Orig->getOperand(0);
IRBuilder<> Builder(Orig->getParent());
Builder.SetInsertPoint(Orig);
Type *Ty = X->getType();
Type *EltTy = Ty->getScalarType();

// Though dx.length does work on scalar type, we can optimize it to just emit
// fabs, in CGBuiltin.cpp. We shouldn't see a scalar type here because
// CGBuiltin.cpp should have emitted a fabs call.
assert(Ty->isVectorTy() && "dx.length only works on vector type");
Value *Elt = Builder.CreateExtractElement(X, (uint64_t)0);
auto *XVec = dyn_cast<FixedVectorType>(Ty);
unsigned size = XVec->getNumElements();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Style nit: s/size/Size/

if (size > 1) {
Value *Sum = Builder.CreateFMul(Elt, Elt);
for (unsigned i = 1; i < size; i++) {
Elt = Builder.CreateExtractElement(X, i);
Value *Mul = Builder.CreateFMul(Elt, Elt);
Sum = Builder.CreateFAdd(Sum, Mul);
}
Value *Result = Builder.CreateIntrinsic(
EltTy, Intrinsic::sqrt, ArrayRef<Value *>{Sum}, nullptr, "elt.sqrt");

Orig->replaceAllUsesWith(Result);
Orig->eraseFromParent();
return true;
} else {
Value *Result = Builder.CreateIntrinsic(
EltTy, Intrinsic::fabs, ArrayRef<Value *>{Elt}, nullptr, "elt.abs");
Orig->replaceAllUsesWith(Result);
Orig->eraseFromParent();
return true;
}
}

static bool expandLerpIntrinsic(CallInst *Orig) {
Value *X = Orig->getOperand(0);
Value *Y = Orig->getOperand(1);
Expand Down Expand Up @@ -280,6 +317,8 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
return expandClampIntrinsic(Orig, F.getIntrinsicID());
case Intrinsic::dx_lerp:
return expandLerpIntrinsic(Orig);
case Intrinsic::dx_length:
return expandLengthIntrinsic(Orig);
case Intrinsic::dx_sdot:
case Intrinsic::dx_udot:
return expandIntegerDot(Orig, F.getIntrinsicID());
Expand Down
Loading