Skip to content

[DirectX] Implement llvm.is.fpclass lowering for the fcNegZero FPClassTest and the IsNaN, IsInf, IsFinite, IsNormal DXIL ops #138048

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions llvm/lib/Target/DirectX/DXIL.td
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,15 @@ def Saturate : DXILOp<7, unary> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def IsNaN : DXILOp<8, isSpecialFloat> {
let Doc = "Determines if the specified value is NaN.";
let arguments = [OverloadTy];
let result = Int1Ty;
let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
let stages = [Stages<DXIL1_0, [all_stages]>];
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def IsInf : DXILOp<9, isSpecialFloat> {
let Doc = "Determines if the specified value is infinite.";
let intrinsics = [IntrinSelect<int_dx_isinf>];
Expand All @@ -432,6 +441,24 @@ def IsInf : DXILOp<9, isSpecialFloat> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def IsFinite : DXILOp<10, isSpecialFloat> {
let Doc = "Determines if the specified value is finite.";
let arguments = [OverloadTy];
let result = Int1Ty;
let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
let stages = [Stages<DXIL1_0, [all_stages]>];
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def IsNormal : DXILOp<11, isSpecialFloat> {
let Doc = "Determines if the specified value is normal.";
let arguments = [OverloadTy];
let result = Int1Ty;
let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
let stages = [Stages<DXIL1_0, [all_stages]>];
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def Cos : DXILOp<12, unary> {
let Doc = "Returns cosine(theta) for theta in radians.";
let intrinsics = [IntrinSelect<int_cos>];
Expand Down
57 changes: 57 additions & 0 deletions llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::abs:
case Intrinsic::atan2:
case Intrinsic::exp:
case Intrinsic::is_fpclass:
case Intrinsic::log:
case Intrinsic::log10:
case Intrinsic::pow:
Expand Down Expand Up @@ -273,6 +274,59 @@ static Value *expandExpIntrinsic(CallInst *Orig) {
return Exp2Call;
}

static Value *expandIsFPClass(CallInst *Orig) {
Value *T = Orig->getArgOperand(1);
auto *TCI = dyn_cast<ConstantInt>(T);

// These FPClassTest cases have DXIL opcodes, so they will be handled in
// DXIL Op Lowering instead.
switch (TCI->getZExtValue()) {
case FPClassTest::fcInf:
case FPClassTest::fcNan:
case FPClassTest::fcNormal:
case FPClassTest::fcFinite:
return nullptr;
}

IRBuilder<> Builder(Orig);

Value *F = Orig->getArgOperand(0);
Type *FTy = F->getType();
unsigned FNumElem = 0; // 0 => F is not a vector

unsigned BitWidth; // Bit width of F or the ElemTy of F
Type *BitCastTy; // An IntNTy of the same bitwidth as F or ElemTy of F

if (auto *FVecTy = dyn_cast<FixedVectorType>(FTy)) {
Type *ElemTy = FVecTy->getElementType();
FNumElem = FVecTy->getNumElements();
BitWidth = ElemTy->getPrimitiveSizeInBits();
BitCastTy = FixedVectorType::get(Builder.getIntNTy(BitWidth), FNumElem);
} else {
BitWidth = FTy->getPrimitiveSizeInBits();
BitCastTy = Builder.getIntNTy(BitWidth);
}

Value *FBitCast = Builder.CreateBitCast(F, BitCastTy);
switch (TCI->getZExtValue()) {
case FPClassTest::fcNegZero: {
Value *NegZero =
ConstantInt::get(Builder.getIntNTy(BitWidth), 1 << (BitWidth - 1));
Value *RetVal;
if (FNumElem) {
Value *NegZeroSplat = Builder.CreateVectorSplat(FNumElem, NegZero);
RetVal =
Builder.CreateICmpEQ(FBitCast, NegZeroSplat, "is.fpclass.negzero");
} else
RetVal = Builder.CreateICmpEQ(FBitCast, NegZero, "is.fpclass.negzero");
return RetVal;
}
default:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this switch is fully covered we don't need the default here, we can move it to after the switch like:

switch (TCI->getZExtValue()) {
  case FPClassTest::fcNegZero: {
    ...
    return RetVal;
  }
}
report_fatal_error(Twine("Unsupported FPClassTest"),
                       /* gen_crash_diag=*/false);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TCI->getZExtValue() is a bitmask and there could be bits set that don't correspond to any test. So I think the default case should stay.

report_fatal_error(Twine("Unsupported FPClassTest"),
/* gen_crash_diag=*/false);
}
}

static Value *expandAnyOrAllIntrinsic(CallInst *Orig,
Intrinsic::ID IntrinsicId) {
Value *X = Orig->getOperand(0);
Expand Down Expand Up @@ -561,6 +615,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::exp:
Result = expandExpIntrinsic(Orig);
break;
case Intrinsic::is_fpclass:
Result = expandIsFPClass(Orig);
break;
case Intrinsic::log:
Result = expandLogIntrinsic(Orig);
break;
Expand Down
47 changes: 47 additions & 0 deletions llvm/lib/Target/DirectX/DXILOpLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,50 @@ class OpLowerer {
});
}

[[nodiscard]] bool lowerIsFPClass(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *RetTy = IRB.getInt1Ty();

return replaceFunction(F, [&](CallInst *CI) -> Error {
IRB.SetInsertPoint(CI);
SmallVector<Value *> Args;
Value *Fl = CI->getArgOperand(0);
Args.push_back(Fl);

dxil::OpCode OpCode;
Value *T = CI->getArgOperand(1);
auto *TCI = dyn_cast<ConstantInt>(T);
switch (TCI->getZExtValue()) {
case FPClassTest::fcInf:
OpCode = dxil::OpCode::IsInf;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm assuming because we can use IsInf here that means its fine that we have let intrinsics = [IntrinSelect<int_dx_isinf>]; defined and for the other dxil::OpCodes we should be able to add their intrinsic mapping when we implement them?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, both the dx_isinf intrinsic and the llvm.is.fpclass intrinsic can lower to the IsInf DXIL op. Their lowerings are independent of each other.

Once the other dx_ intrinsics are defined, they can be mapped to their respective DXIL ops using intrinsics = [IntrinSelect<int_dx_*>].

break;
case FPClassTest::fcNan:
OpCode = dxil::OpCode::IsNaN;
break;
case FPClassTest::fcNormal:
OpCode = dxil::OpCode::IsNormal;
break;
case FPClassTest::fcFinite:
OpCode = dxil::OpCode::IsFinite;
break;
default:
SmallString<128> Msg =
formatv("Unsupported FPClassTest {0} for DXIL Op Lowering",
TCI->getZExtValue());
return make_error<StringError>(Msg, inconvertibleErrorCode());
}

Expected<CallInst *> OpCall =
OpBuilder.tryCreateOp(OpCode, Args, CI->getName(), RetTy);
if (Error E = OpCall.takeError())
return E;

CI->replaceAllUsesWith(*OpCall);
CI->eraseFromParent();
return Error::success();
});
}

bool lowerIntrinsics() {
bool Updated = false;
bool HasErrors = false;
Expand Down Expand Up @@ -799,6 +843,9 @@ class OpLowerer {
case Intrinsic::ctpop:
HasErrors |= lowerCtpopToCountBits(F);
break;
case Intrinsic::is_fpclass:
HasErrors |= lowerIsFPClass(F);
break;
}
Updated = true;
}
Expand Down
154 changes: 154 additions & 0 deletions llvm/test/CodeGen/DirectX/is_fpclass.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s


define noundef i1 @isnegzero(float noundef %a) {
; CHECK-LABEL: define noundef i1 @isnegzero(
; CHECK-SAME: float noundef [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A]] to i32
; CHECK-NEXT: [[IS_FPCLASS_NEGZERO:%.*]] = icmp eq i32 [[TMP0]], -2147483648
; CHECK-NEXT: ret i1 [[IS_FPCLASS_NEGZERO]]
;
entry:
%0 = call i1 @llvm.is.fpclass.f32(float %a, i32 32)
ret i1 %0
}

define noundef <2 x i1> @isnegzerov2(<2 x float> noundef %a) {
; CHECK-LABEL: define noundef <2 x i1> @isnegzerov2(
; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x float> [[A]], i64 0
; CHECK-NEXT: [[DOTI0:%.*]] = bitcast float [[A_I0]] to i32
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x float> [[A]], i64 1
; CHECK-NEXT: [[DOTI1:%.*]] = bitcast float [[A_I1]] to i32
; CHECK-NEXT: [[IS_FPCLASS_NEGZERO_I0:%.*]] = icmp eq i32 [[DOTI0]], -2147483648
; CHECK-NEXT: [[IS_FPCLASS_NEGZERO_I1:%.*]] = icmp eq i32 [[DOTI1]], -2147483648
; CHECK-NEXT: [[IS_FPCLASS_NEGZERO_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[IS_FPCLASS_NEGZERO_I0]], i64 0
; CHECK-NEXT: [[IS_FPCLASS_NEGZERO:%.*]] = insertelement <2 x i1> [[IS_FPCLASS_NEGZERO_UPTO0]], i1 [[IS_FPCLASS_NEGZERO_I1]], i64 1
; CHECK-NEXT: ret <2 x i1> [[IS_FPCLASS_NEGZERO]]
;
entry:
%0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 32)
ret <2 x i1> %0
}

define noundef i1 @isnan(float noundef %a) {
; CHECK-LABEL: define noundef i1 @isnan(
; CHECK-SAME: float noundef [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 8, float [[A]]) #[[ATTR0:[0-9]+]]
; CHECK-NEXT: ret i1 [[TMP0]]
;
entry:
%0 = call i1 @llvm.is.fpclass.f32(float %a, i32 3)
ret i1 %0
}

define noundef <2 x i1> @isnanv2(<2 x float> noundef %a) {
; CHECK-LABEL: define noundef <2 x i1> @isnanv2(
; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x float> [[A]], i64 0
; CHECK-NEXT: [[DOTI02:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 8, float [[A_I0]]) #[[ATTR0]]
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x float> [[A]], i64 1
; CHECK-NEXT: [[DOTI11:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 8, float [[A_I1]]) #[[ATTR0]]
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[DOTI02]], i64 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i1> [[DOTUPTO0]], i1 [[DOTI11]], i64 1
; CHECK-NEXT: ret <2 x i1> [[TMP0]]
;
entry:
%0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 3)
ret <2 x i1> %0
}

define noundef i1 @isinf(float noundef %a) {
; CHECK-LABEL: define noundef i1 @isinf(
; CHECK-SAME: float noundef [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 9, float [[A]]) #[[ATTR0]]
; CHECK-NEXT: ret i1 [[TMP0]]
;
entry:
%0 = call i1 @llvm.is.fpclass.f32(float %a, i32 516)
ret i1 %0
}

define noundef <2 x i1> @isinfv2(<2 x float> noundef %a) {
; CHECK-LABEL: define noundef <2 x i1> @isinfv2(
; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x float> [[A]], i64 0
; CHECK-NEXT: [[DOTI02:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 9, float [[A_I0]]) #[[ATTR0]]
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x float> [[A]], i64 1
; CHECK-NEXT: [[DOTI11:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 9, float [[A_I1]]) #[[ATTR0]]
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[DOTI02]], i64 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i1> [[DOTUPTO0]], i1 [[DOTI11]], i64 1
; CHECK-NEXT: ret <2 x i1> [[TMP0]]
;
entry:
%0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 516)
ret <2 x i1> %0
}

define noundef i1 @isfinite(float noundef %a) {
; CHECK-LABEL: define noundef i1 @isfinite(
; CHECK-SAME: float noundef [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 10, float [[A]]) #[[ATTR0]]
; CHECK-NEXT: ret i1 [[TMP0]]
;
entry:
%0 = call i1 @llvm.is.fpclass.f32(float %a, i32 504)
ret i1 %0
}

define noundef <2 x i1> @isfinitev2(<2 x float> noundef %a) {
; CHECK-LABEL: define noundef <2 x i1> @isfinitev2(
; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x float> [[A]], i64 0
; CHECK-NEXT: [[DOTI02:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 10, float [[A_I0]]) #[[ATTR0]]
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x float> [[A]], i64 1
; CHECK-NEXT: [[DOTI11:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 10, float [[A_I1]]) #[[ATTR0]]
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[DOTI02]], i64 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i1> [[DOTUPTO0]], i1 [[DOTI11]], i64 1
; CHECK-NEXT: ret <2 x i1> [[TMP0]]
;
entry:
%0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 504)
ret <2 x i1> %0
}

define noundef i1 @isnormal(float noundef %a) {
; CHECK-LABEL: define noundef i1 @isnormal(
; CHECK-SAME: float noundef [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 11, float [[A]]) #[[ATTR0]]
; CHECK-NEXT: ret i1 [[TMP0]]
;
entry:
%0 = call i1 @llvm.is.fpclass.f32(float %a, i32 264)
ret i1 %0
}

define noundef <2 x i1> @isnormalv2(<2 x float> noundef %a) {
; CHECK-LABEL: define noundef <2 x i1> @isnormalv2(
; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x float> [[A]], i64 0
; CHECK-NEXT: [[DOTI02:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 11, float [[A_I0]]) #[[ATTR0]]
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x float> [[A]], i64 1
; CHECK-NEXT: [[DOTI11:%.*]] = call i1 @dx.op.isSpecialFloat.f32(i32 11, float [[A_I1]]) #[[ATTR0]]
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[DOTI02]], i64 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i1> [[DOTUPTO0]], i1 [[DOTI11]], i64 1
; CHECK-NEXT: ret <2 x i1> [[TMP0]]
;
entry:
%0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 264)
ret <2 x i1> %0
}

declare i1 @llvm.is.fpclass.f32(float, i32 immarg)
declare <2 x i1> @llvm.is.fpclass.v2f32(<2 x float>, i32 immarg)