Skip to content

Commit 1227d3b

Browse files
authored
Add xarch andn (#64350)
* basic functionality implemented * add instruction format to list checked in AreFlagsSetToZeroCmp comment and tidy * review feedback and clarify instruction flags * change op local check to result containment * add memory op formats and update comments
1 parent 21f8078 commit 1227d3b

File tree

6 files changed

+127
-55
lines changed

6 files changed

+127
-55
lines changed

src/coreclr/jit/emitxarch.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,10 @@ bool emitter::AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, genTreeOps tr
423423
case IF_RWR:
424424
case IF_RRD:
425425
case IF_RRW:
426+
case IF_RWR_RRD_RRD:
427+
case IF_RWR_RRD_MRD:
428+
case IF_RWR_RRD_ARD:
429+
case IF_RWR_RRD_SRD:
426430
break;
427431
default:
428432
return false;

src/coreclr/jit/instrsxarch.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,7 @@ INST3(LAST_AVXVNNI_INSTRUCTION, "LAST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BA
592592

593593
// BMI1
594594
INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
595-
INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT
595+
INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Resets_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT
596596
INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Extract Lowest Set Isolated Bit
597597
INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Get Mask Up to Lowest Set Bit
598598
INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit

src/coreclr/jit/lower.cpp

Lines changed: 1 addition & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
139139
case GT_AND:
140140
case GT_OR:
141141
case GT_XOR:
142-
return LowerBinaryArithmeticCommon(node->AsOp());
142+
return LowerBinaryArithmetic(node->AsOp());
143143

144144
case GT_MUL:
145145
case GT_MULHI:
@@ -5133,53 +5133,6 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
51335133
return nullptr;
51345134
}
51355135

5136-
//------------------------------------------------------------------------
5137-
// LowerBinaryArithmeticCommon: lowers the given binary arithmetic node.
5138-
//
5139-
// Recognizes opportunities for using target-independent "combined" nodes
5140-
// (currently AND_NOT on ARMArch). Performs containment checks.
5141-
//
5142-
// Arguments:
5143-
// node - the arithmetic node to lower
5144-
//
5145-
// Returns:
5146-
// The next node to lower.
5147-
//
5148-
GenTree* Lowering::LowerBinaryArithmeticCommon(GenTreeOp* binOp)
5149-
{
5150-
// TODO-CQ-XArch: support BMI2 "andn" in codegen and condition
5151-
// this logic on the support for the instruction set on XArch.
5152-
CLANG_FORMAT_COMMENT_ANCHOR;
5153-
5154-
#ifdef TARGET_ARMARCH
5155-
if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND))
5156-
{
5157-
GenTree* opNode = nullptr;
5158-
GenTree* notNode = nullptr;
5159-
if (binOp->gtGetOp1()->OperIs(GT_NOT))
5160-
{
5161-
notNode = binOp->gtGetOp1();
5162-
opNode = binOp->gtGetOp2();
5163-
}
5164-
else if (binOp->gtGetOp2()->OperIs(GT_NOT))
5165-
{
5166-
notNode = binOp->gtGetOp2();
5167-
opNode = binOp->gtGetOp1();
5168-
}
5169-
5170-
if (notNode != nullptr)
5171-
{
5172-
binOp->gtOp1 = opNode;
5173-
binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1();
5174-
binOp->ChangeOper(GT_AND_NOT);
5175-
BlockRange().Remove(notNode);
5176-
}
5177-
}
5178-
#endif
5179-
5180-
return LowerBinaryArithmetic(binOp);
5181-
}
5182-
51835136
//------------------------------------------------------------------------
51845137
// LowerUnsignedDivOrMod: Lowers a GT_UDIV/GT_UMOD node.
51855138
//

src/coreclr/jit/lower.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,6 @@ class Lowering final : public Phase
297297
void LowerStoreIndir(GenTreeStoreInd* node);
298298
GenTree* LowerAdd(GenTreeOp* node);
299299
GenTree* LowerMul(GenTreeOp* mul);
300-
GenTree* LowerBinaryArithmeticCommon(GenTreeOp* binOp);
301300
GenTree* LowerBinaryArithmetic(GenTreeOp* binOp);
302301
bool LowerUnsignedDivOrMod(GenTreeOp* divMod);
303302
GenTree* LowerConstIntDivOrMod(GenTree* node);
@@ -344,7 +343,8 @@ class Lowering final : public Phase
344343
void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node);
345344
void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node);
346345
void LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node);
347-
GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* binOp);
346+
GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode);
347+
GenTree* TryLowerAndOpToAndNot(GenTreeOp* andNode);
348348
#elif defined(TARGET_ARM64)
349349
bool IsValidConstForMovImm(GenTreeHWIntrinsic* node);
350350
void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node);

src/coreclr/jit/lowerarmarch.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,30 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul)
292292
//
293293
GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
294294
{
295+
if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND))
296+
{
297+
GenTree* opNode = nullptr;
298+
GenTree* notNode = nullptr;
299+
if (binOp->gtGetOp1()->OperIs(GT_NOT))
300+
{
301+
notNode = binOp->gtGetOp1();
302+
opNode = binOp->gtGetOp2();
303+
}
304+
else if (binOp->gtGetOp2()->OperIs(GT_NOT))
305+
{
306+
notNode = binOp->gtGetOp2();
307+
opNode = binOp->gtGetOp1();
308+
}
309+
310+
if (notNode != nullptr)
311+
{
312+
binOp->gtOp1 = opNode;
313+
binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1();
314+
binOp->ChangeOper(GT_AND_NOT);
315+
BlockRange().Remove(notNode);
316+
}
317+
}
318+
295319
ContainCheckBinary(binOp);
296320

297321
return binOp->gtNext;

src/coreclr/jit/lowerxarch.cpp

Lines changed: 95 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,9 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul)
162162
//------------------------------------------------------------------------
163163
// LowerBinaryArithmetic: lowers the given binary arithmetic node.
164164
//
165+
// Recognizes opportunities for using target-independent "combined" nodes
166+
// Performs containment checks.
167+
//
165168
// Arguments:
166169
// node - the arithmetic node to lower
167170
//
@@ -173,10 +176,16 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
173176
#ifdef FEATURE_HW_INTRINSICS
174177
if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND) && varTypeIsIntegral(binOp))
175178
{
176-
GenTree* blsrNode = TryLowerAndOpToResetLowestSetBit(binOp);
177-
if (blsrNode != nullptr)
179+
GenTree* replacementNode = TryLowerAndOpToAndNot(binOp);
180+
if (replacementNode != nullptr)
181+
{
182+
return replacementNode->gtNext;
183+
}
184+
185+
replacementNode = TryLowerAndOpToResetLowestSetBit(binOp);
186+
if (replacementNode != nullptr)
178187
{
179-
return blsrNode->gtNext;
188+
return replacementNode->gtNext;
180189
}
181190
}
182191
#endif
@@ -3726,14 +3735,16 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node)
37263735
}
37273736

37283737
//----------------------------------------------------------------------------------------------
3729-
// Lowering::TryLowerAndOpToResetLowestSetBit: Lowers a tree AND(X, ADD(X, -1) to HWIntrinsic::ResetLowestSetBit
3738+
// Lowering::TryLowerAndOpToResetLowestSetBit: Lowers a tree AND(X, ADD(X, -1)) to HWIntrinsic::ResetLowestSetBit
37303739
//
37313740
// Arguments:
37323741
// andNode - GT_AND node of integral type
37333742
//
37343743
// Return Value:
37353744
// Returns the replacement node if one is created else nullptr indicating no replacement
37363745
//
3746+
// Notes:
3747+
// Performs containment checks on the replacement node if one is created
37373748
GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode)
37383749
{
37393750
assert(andNode->OperIs(GT_AND) && varTypeIsIntegral(andNode));
@@ -3802,6 +3813,86 @@ GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode)
38023813
return blsrNode;
38033814
}
38043815

3816+
//----------------------------------------------------------------------------------------------
3817+
// Lowering::TryLowerAndOpToAndNot: Lowers a tree AND(X, NOT(Y)) to HWIntrinsic::AndNot
3818+
//
3819+
// Arguments:
3820+
// andNode - GT_AND node of integral type
3821+
//
3822+
// Return Value:
3823+
// Returns the replacement node if one is created else nullptr indicating no replacement
3824+
//
3825+
// Notes:
3826+
// Performs containment checks on the replacement node if one is created
3827+
GenTree* Lowering::TryLowerAndOpToAndNot(GenTreeOp* andNode)
3828+
{
3829+
assert(andNode->OperIs(GT_AND) && varTypeIsIntegral(andNode));
3830+
3831+
GenTree* opNode = nullptr;
3832+
GenTree* notNode = nullptr;
3833+
if (andNode->gtGetOp1()->OperIs(GT_NOT))
3834+
{
3835+
notNode = andNode->gtGetOp1();
3836+
opNode = andNode->gtGetOp2();
3837+
}
3838+
else if (andNode->gtGetOp2()->OperIs(GT_NOT))
3839+
{
3840+
notNode = andNode->gtGetOp2();
3841+
opNode = andNode->gtGetOp1();
3842+
}
3843+
3844+
if (opNode == nullptr)
3845+
{
3846+
return nullptr;
3847+
}
3848+
3849+
// We want to avoid using "andn" when one of the operands is both a source and the destination and is also coming
3850+
// from memory. In this scenario, we will get smaller and likely faster code by using the RMW encoding of `and`
3851+
if (IsBinOpInRMWStoreInd(andNode))
3852+
{
3853+
return nullptr;
3854+
}
3855+
3856+
NamedIntrinsic intrinsic;
3857+
if (andNode->TypeIs(TYP_LONG) && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64))
3858+
{
3859+
intrinsic = NamedIntrinsic::NI_BMI1_X64_AndNot;
3860+
}
3861+
else if (comp->compOpportunisticallyDependsOn(InstructionSet_BMI1))
3862+
{
3863+
intrinsic = NamedIntrinsic::NI_BMI1_AndNot;
3864+
}
3865+
else
3866+
{
3867+
return nullptr;
3868+
}
3869+
3870+
LIR::Use use;
3871+
if (!BlockRange().TryGetUse(andNode, &use))
3872+
{
3873+
return nullptr;
3874+
}
3875+
3876+
// note that parameter order for andn is ~y, x so these are purposefully reversed when creating the node
3877+
GenTreeHWIntrinsic* andnNode =
3878+
comp->gtNewScalarHWIntrinsicNode(andNode->TypeGet(), notNode->AsUnOp()->gtGetOp1(), opNode, intrinsic);
3879+
3880+
JITDUMP("Lower: optimize AND(X, NOT(Y)))\n");
3881+
DISPNODE(andNode);
3882+
JITDUMP("to:\n");
3883+
DISPNODE(andnNode);
3884+
3885+
use.ReplaceWith(andnNode);
3886+
3887+
BlockRange().InsertBefore(andNode, andnNode);
3888+
BlockRange().Remove(andNode);
3889+
BlockRange().Remove(notNode);
3890+
3891+
ContainCheckHWIntrinsic(andnNode);
3892+
3893+
return andnNode;
3894+
}
3895+
38053896
#endif // FEATURE_HW_INTRINSICS
38063897

38073898
//----------------------------------------------------------------------------------------------

0 commit comments

Comments
 (0)