Skip to content

Commit 73bf54f

Browse files
authored
Add blsr (#63545)
1 parent bb5ca4c commit 73bf54f

File tree

7 files changed

+168
-22
lines changed

7 files changed

+168
-22
lines changed

src/coreclr/jit/emitxarch.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ bool IsDstDstSrcAVXInstruction(instruction ins);
193193
bool IsDstSrcSrcAVXInstruction(instruction ins);
194194
bool HasRegularWideForm(instruction ins);
195195
bool HasRegularWideImmediateForm(instruction ins);
196-
bool DoesWriteZeroFlag(instruction ins);
196+
static bool DoesWriteZeroFlag(instruction ins);
197197
bool DoesWriteSignFlag(instruction ins);
198198
bool DoesResetOverflowAndCarryFlags(instruction ins);
199199
bool IsFlagsAlwaysModified(instrDesc* id);

src/coreclr/jit/hwintrinsic.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,25 @@ struct HWIntrinsicInfo
577577
return lookup(id).ins[type - TYP_BYTE];
578578
}
579579

580+
static instruction lookupIns(GenTreeHWIntrinsic* intrinsicNode)
581+
{
582+
assert(intrinsicNode != nullptr);
583+
584+
NamedIntrinsic intrinsic = intrinsicNode->GetHWIntrinsicId();
585+
var_types type = TYP_UNKNOWN;
586+
587+
if (lookupCategory(intrinsic) == HW_Category_Scalar)
588+
{
589+
type = intrinsicNode->TypeGet();
590+
}
591+
else
592+
{
593+
type = intrinsicNode->GetSimdBaseType();
594+
}
595+
596+
return lookupIns(intrinsic, type);
597+
}
598+
580599
static HWIntrinsicCategory lookupCategory(NamedIntrinsic id)
581600
{
582601
return lookup(id).category;

src/coreclr/jit/instrsxarch.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE
595595
INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT
596596
INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Extract Lowest Set Isolated Bit
597597
INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Get Mask Up to Lowest Set Bit
598-
INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit
598+
INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit
599599
INST3(bextr, "bextr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_Flags_IsDstDstSrcAVXInstruction) // Bit Field Extract
600600

601601
// BMI2

src/coreclr/jit/lower.cpp

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
139139
case GT_AND:
140140
case GT_OR:
141141
case GT_XOR:
142-
return LowerBinaryArithmetic(node->AsOp());
142+
return LowerBinaryArithmeticCommon(node->AsOp());
143143

144144
case GT_MUL:
145145
case GT_MULHI:
@@ -2708,10 +2708,16 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp)
27082708

27092709
if (op2->IsIntegralConst(0) && (op1->gtNext == op2) && (op2->gtNext == cmp) &&
27102710
#ifdef TARGET_XARCH
2711-
op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG))
2711+
(op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG)
2712+
#ifdef FEATURE_HW_INTRINSICS
2713+
|| (op1->OperIs(GT_HWINTRINSIC) &&
2714+
emitter::DoesWriteZeroFlag(HWIntrinsicInfo::lookupIns(op1->AsHWIntrinsic())))
2715+
#endif // FEATURE_HW_INTRINSICS
2716+
)
27122717
#else // TARGET_ARM64
2713-
op1->OperIs(GT_AND, GT_ADD, GT_SUB))
2718+
op1->OperIs(GT_AND, GT_ADD, GT_SUB)
27142719
#endif
2720+
)
27152721
{
27162722
op1->gtFlags |= GTF_SET_FLAGS;
27172723
op1->SetUnusedValue();
@@ -5117,7 +5123,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
51175123
}
51185124

51195125
//------------------------------------------------------------------------
5120-
// LowerBinaryArithmetic: lowers the given binary arithmetic node.
5126+
// LowerBinaryArithmeticCommon: lowers the given binary arithmetic node.
51215127
//
51225128
// Recognizes opportunities for using target-independent "combined" nodes
51235129
// (currently AND_NOT on ARMArch). Performs containment checks.
@@ -5128,41 +5134,39 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
51285134
// Returns:
51295135
// The next node to lower.
51305136
//
5131-
GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* node)
5137+
GenTree* Lowering::LowerBinaryArithmeticCommon(GenTreeOp* binOp)
51325138
{
51335139
// TODO-CQ-XArch: support BMI2 "andn" in codegen and condition
51345140
// this logic on the support for the instruction set on XArch.
51355141
CLANG_FORMAT_COMMENT_ANCHOR;
51365142

51375143
#ifdef TARGET_ARMARCH
5138-
if (comp->opts.OptimizationEnabled() && node->OperIs(GT_AND))
5144+
if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND))
51395145
{
51405146
GenTree* opNode = nullptr;
51415147
GenTree* notNode = nullptr;
5142-
if (node->gtGetOp1()->OperIs(GT_NOT))
5148+
if (binOp->gtGetOp1()->OperIs(GT_NOT))
51435149
{
5144-
notNode = node->gtGetOp1();
5145-
opNode = node->gtGetOp2();
5150+
notNode = binOp->gtGetOp1();
5151+
opNode = binOp->gtGetOp2();
51465152
}
5147-
else if (node->gtGetOp2()->OperIs(GT_NOT))
5153+
else if (binOp->gtGetOp2()->OperIs(GT_NOT))
51485154
{
5149-
notNode = node->gtGetOp2();
5150-
opNode = node->gtGetOp1();
5155+
notNode = binOp->gtGetOp2();
5156+
opNode = binOp->gtGetOp1();
51515157
}
51525158

51535159
if (notNode != nullptr)
51545160
{
5155-
node->gtOp1 = opNode;
5156-
node->gtOp2 = notNode->AsUnOp()->gtGetOp1();
5157-
node->ChangeOper(GT_AND_NOT);
5161+
binOp->gtOp1 = opNode;
5162+
binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1();
5163+
binOp->ChangeOper(GT_AND_NOT);
51585164
BlockRange().Remove(notNode);
51595165
}
51605166
}
5161-
#endif // TARGET_ARMARCH
5162-
5163-
ContainCheckBinary(node);
5167+
#endif
51645168

5165-
return node->gtNext;
5169+
return LowerBinaryArithmetic(binOp);
51665170
}
51675171

51685172
//------------------------------------------------------------------------

src/coreclr/jit/lower.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,8 @@ class Lowering final : public Phase
297297
void LowerStoreIndir(GenTreeStoreInd* node);
298298
GenTree* LowerAdd(GenTreeOp* node);
299299
GenTree* LowerMul(GenTreeOp* mul);
300-
GenTree* LowerBinaryArithmetic(GenTreeOp* node);
300+
GenTree* LowerBinaryArithmeticCommon(GenTreeOp* binOp);
301+
GenTree* LowerBinaryArithmetic(GenTreeOp* binOp);
301302
bool LowerUnsignedDivOrMod(GenTreeOp* divMod);
302303
GenTree* LowerConstIntDivOrMod(GenTree* node);
303304
GenTree* LowerSignedDivOrMod(GenTree* node);
@@ -343,6 +344,7 @@ class Lowering final : public Phase
343344
void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node);
344345
void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node);
345346
void LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node);
347+
GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* binOp);
346348
#elif defined(TARGET_ARM64)
347349
bool IsValidConstForMovImm(GenTreeHWIntrinsic* node);
348350
void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node);

src/coreclr/jit/lowerarmarch.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,22 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul)
281281
return mul->gtNext;
282282
}
283283

284+
//------------------------------------------------------------------------
285+
// LowerBinaryArithmetic: lowers the given binary arithmetic node.
286+
//
287+
// Arguments:
288+
// node - the arithmetic node to lower
289+
//
290+
// Returns:
291+
// The next node to lower.
292+
//
293+
GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
294+
{
295+
ContainCheckBinary(binOp);
296+
297+
return binOp->gtNext;
298+
}
299+
284300
//------------------------------------------------------------------------
285301
// LowerBlockStore: Lower a block store node
286302
//

src/coreclr/jit/lowerxarch.cpp

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,33 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul)
159159
return mul->gtNext;
160160
}
161161

162+
//------------------------------------------------------------------------
163+
// LowerBinaryArithmetic: lowers the given binary arithmetic node.
164+
//
165+
// Arguments:
166+
// node - the arithmetic node to lower
167+
//
168+
// Returns:
169+
// The next node to lower.
170+
//
171+
GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
172+
{
173+
#ifdef FEATURE_HW_INTRINSICS
174+
if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND) && varTypeIsIntegral(binOp))
175+
{
176+
GenTree* blsrNode = TryLowerAndOpToResetLowestSetBit(binOp);
177+
if (blsrNode != nullptr)
178+
{
179+
return blsrNode->gtNext;
180+
}
181+
}
182+
#endif
183+
184+
ContainCheckBinary(binOp);
185+
186+
return binOp->gtNext;
187+
}
188+
162189
//------------------------------------------------------------------------
163190
// LowerBlockStore: Lower a block store node
164191
//
@@ -3697,6 +3724,84 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node)
36973724
LowerNode(cast);
36983725
}
36993726
}
3727+
3728+
//----------------------------------------------------------------------------------------------
3729+
// Lowering::TryLowerAndOpToResetLowestSetBit: Lowers a tree AND(X, ADD(X, -1) to HWIntrinsic::ResetLowestSetBit
3730+
//
3731+
// Arguments:
3732+
// andNode - GT_AND node of integral type
3733+
//
3734+
// Return Value:
3735+
// Returns the replacement node if one is created else nullptr indicating no replacement
3736+
//
3737+
GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode)
3738+
{
3739+
assert(andNode->OperIs(GT_AND) && varTypeIsIntegral(andNode));
3740+
3741+
GenTree* op1 = andNode->gtGetOp1();
3742+
if (!op1->OperIs(GT_LCL_VAR) || comp->lvaGetDesc(op1->AsLclVar())->IsAddressExposed())
3743+
{
3744+
return nullptr;
3745+
}
3746+
3747+
GenTree* op2 = andNode->gtGetOp2();
3748+
if (!op2->OperIs(GT_ADD))
3749+
{
3750+
return nullptr;
3751+
}
3752+
3753+
GenTree* addOp2 = op2->gtGetOp2();
3754+
if (!addOp2->IsIntegralConst(-1))
3755+
{
3756+
return nullptr;
3757+
}
3758+
3759+
GenTree* addOp1 = op2->gtGetOp1();
3760+
if (!addOp1->OperIs(GT_LCL_VAR) || (addOp1->AsLclVar()->GetLclNum() != op1->AsLclVar()->GetLclNum()))
3761+
{
3762+
return nullptr;
3763+
}
3764+
3765+
NamedIntrinsic intrinsic;
3766+
if (op1->TypeIs(TYP_LONG) && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64))
3767+
{
3768+
intrinsic = NamedIntrinsic::NI_BMI1_X64_ResetLowestSetBit;
3769+
}
3770+
else if (comp->compOpportunisticallyDependsOn(InstructionSet_BMI1))
3771+
{
3772+
intrinsic = NamedIntrinsic::NI_BMI1_ResetLowestSetBit;
3773+
}
3774+
else
3775+
{
3776+
return nullptr;
3777+
}
3778+
3779+
LIR::Use use;
3780+
if (!BlockRange().TryGetUse(andNode, &use))
3781+
{
3782+
return nullptr;
3783+
}
3784+
3785+
GenTreeHWIntrinsic* blsrNode = comp->gtNewScalarHWIntrinsicNode(andNode->TypeGet(), op1, intrinsic);
3786+
3787+
JITDUMP("Lower: optimize AND(X, ADD(X, -1))\n");
3788+
DISPNODE(andNode);
3789+
JITDUMP("to:\n");
3790+
DISPNODE(blsrNode);
3791+
3792+
use.ReplaceWith(blsrNode);
3793+
3794+
BlockRange().InsertBefore(andNode, blsrNode);
3795+
BlockRange().Remove(andNode);
3796+
BlockRange().Remove(op2);
3797+
BlockRange().Remove(addOp1);
3798+
BlockRange().Remove(addOp2);
3799+
3800+
ContainCheckHWIntrinsic(blsrNode);
3801+
3802+
return blsrNode;
3803+
}
3804+
37003805
#endif // FEATURE_HW_INTRINSICS
37013806

37023807
//----------------------------------------------------------------------------------------------

0 commit comments

Comments
 (0)