Skip to content

Commit f5fee8f

Browse files
Update fgMorphHWIntrinsic to more closely follow fgMorphSmpOp (#116892)
* Update fgMorphHWIntrinsic to more closely follow fgMorphSmpOp * Cover some more arithmetic and logical operations for hwintrinsic morph * Add support for negating hwintrinsic compares * Ensure that reversal of floating-point hwintrinsic compare operations is done correctly * Ensure that the morphed op2 is taken * Ensure we don't create a CvtMaskToVector node with the wrong base type * Apply formatting patch * Ensure the operands are set when changing the intrinsic ID
1 parent c0eb288 commit f5fee8f

File tree

5 files changed

+979
-264
lines changed

5 files changed

+979
-264
lines changed

src/coreclr/jit/compiler.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6693,6 +6693,8 @@ class Compiler
66936693
GenTree* fgOptimizeRelationalComparisonWithFullRangeConst(GenTreeOp* cmp);
66946694
#if defined(FEATURE_HW_INTRINSICS)
66956695
GenTree* fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree);
6696+
GenTree* fgMorphHWIntrinsicRequired(GenTreeHWIntrinsic* tree);
6697+
GenTree* fgMorphHWIntrinsicOptional(GenTreeHWIntrinsic* tree);
66966698
GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node);
66976699
GenTree* fgOptimizeHWIntrinsicAssociative(GenTreeHWIntrinsic* node);
66986700
#if defined(FEATURE_MASKED_HW_INTRINSICS)

src/coreclr/jit/gentree.cpp

Lines changed: 115 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29097,9 +29097,60 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId)
2909729097
// GetOperForHWIntrinsicId: Returns oper based on the intrinsic ID and base type
2909829098
//
2909929099
// Arguments:
29100-
// id - The intrinsic ID for which to get the oper
29101-
// simdBaseType - The base type on which id is executed
29102-
// isScalar - On return, contains true if the oper is over scalar data; otherwise false
29100+
// isScalar - On return, contains true if the oper is over scalar data; otherwise false
29101+
// getEffectiveOp - true to check for certain special patterns and return the effective operation
29102+
//
29103+
// Returns:
29104+
// The oper based on the intrinsic ID and base type
29105+
//
29106+
genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(bool* isScalar, bool getEffectiveOp) const
29107+
{
29108+
var_types simdBaseType = GetSimdBaseType();
29109+
genTreeOps oper = GetOperForHWIntrinsicId(GetHWIntrinsicId(), simdBaseType, isScalar);
29110+
29111+
if (getEffectiveOp)
29112+
{
29113+
if (oper == GT_SUB)
29114+
{
29115+
GenTree* op1 = Op(1);
29116+
29117+
if (varTypeIsIntegral(simdBaseType))
29118+
{
29119+
if (op1->IsVectorZero())
29120+
{
29121+
oper = GT_NEG;
29122+
}
29123+
else if (isScalar && op1->IsCnsVec() && op1->AsVecCon()->IsScalarZero(simdBaseType))
29124+
{
29125+
oper = GT_NEG;
29126+
}
29127+
}
29128+
}
29129+
else if (oper == GT_XOR)
29130+
{
29131+
GenTree* op2 = Op(2);
29132+
29133+
if (op2->IsVectorAllBitsSet())
29134+
{
29135+
oper = GT_NOT;
29136+
}
29137+
else if (varTypeIsFloating(simdBaseType) && op2->IsVectorNegativeZero(simdBaseType))
29138+
{
29139+
oper = GT_NEG;
29140+
}
29141+
}
29142+
}
29143+
29144+
return oper;
29145+
}
29146+
29147+
//------------------------------------------------------------------------------
29148+
// GetOperForHWIntrinsicId: Returns oper based on the intrinsic ID and base type
29149+
//
29150+
// Arguments:
29151+
// id - The intrinsic ID for which to get the oper
29152+
// simdBaseType - The base type on which id is executed
29153+
// isScalar - On return, contains true if the oper is over scalar data; otherwise false
2910329154
//
2910429155
// Returns:
2910529156
// The oper based on the intrinsic ID and base type
@@ -30405,6 +30456,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp,
3040530456
// simdBaseType - The base type on which oper is executed
3040630457
// simdSize - The simd size on which oper is executed
3040730458
// isScalar - True if the oper is over scalar data; otherwise false
30459+
// reverseCond - True if the oper should be reversed; otherwise false
3040830460
//
3040930461
// Returns:
3041030462
// The intrinsic ID based on the oper, base type, and simd size
@@ -30416,7 +30468,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
3041630468
GenTree* op2,
3041730469
var_types simdBaseType,
3041830470
unsigned simdSize,
30419-
bool isScalar)
30471+
bool isScalar,
30472+
bool reverseCond)
3042030473
{
3042130474
var_types simdType = comp->getSIMDTypeForSize(simdSize);
3042230475
assert(varTypeIsMask(type) || (type == simdType));
@@ -30453,6 +30506,27 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
3045330506

3045430507
NamedIntrinsic id = NI_Illegal;
3045530508

30509+
if (reverseCond)
30510+
{
30511+
oper = ReverseRelop(oper);
30512+
30513+
if (varTypeIsIntegral(simdBaseType))
30514+
{
30515+
reverseCond = false;
30516+
}
30517+
#if defined(TARGET_ARM64)
30518+
else if (oper != GT_EQ)
30519+
{
30520+
// Unlike xarch, there is no reverse comparison
30521+
// for floating-point and so we cannot actually
30522+
// optimize these. The exception is GT_NE which
30523+
// becomes GT_EQ
30524+
30525+
return NI_Illegal;
30526+
}
30527+
#endif
30528+
}
30529+
3045630530
switch (oper)
3045730531
{
3045830532
case GT_EQ:
@@ -30507,7 +30581,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
3050730581
#if defined(TARGET_XARCH)
3050830582
if (varTypeIsMask(type))
3050930583
{
30510-
id = NI_AVX512_CompareGreaterThanOrEqualMask;
30584+
id = reverseCond ? NI_AVX512_CompareNotLessThanMask : NI_AVX512_CompareGreaterThanOrEqualMask;
3051130585
}
3051230586
else if (varTypeIsIntegral(simdBaseType))
3051330587
{
@@ -30516,11 +30590,15 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
3051630590
}
3051730591
else if (simdSize == 32)
3051830592
{
30519-
id = NI_AVX_CompareGreaterThanOrEqual;
30593+
id = reverseCond ? NI_AVX_CompareNotLessThan : NI_AVX_CompareGreaterThanOrEqual;
30594+
}
30595+
else if (isScalar)
30596+
{
30597+
id = reverseCond ? NI_X86Base_CompareScalarNotLessThan : NI_X86Base_CompareScalarGreaterThanOrEqual;
3052030598
}
3052130599
else
3052230600
{
30523-
id = isScalar ? NI_X86Base_CompareScalarGreaterThanOrEqual : NI_X86Base_CompareGreaterThanOrEqual;
30601+
id = reverseCond ? NI_X86Base_CompareNotLessThan : NI_X86Base_CompareGreaterThanOrEqual;
3052430602
}
3052530603
#elif defined(TARGET_ARM64)
3052630604
if (genTypeSize(simdBaseType) == 8)
@@ -30543,7 +30621,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
3054330621
#if defined(TARGET_XARCH)
3054430622
if (varTypeIsMask(type))
3054530623
{
30546-
id = NI_AVX512_CompareGreaterThanMask;
30624+
id = reverseCond ? NI_AVX512_CompareNotLessThanOrEqualMask : NI_AVX512_CompareGreaterThanMask;
3054730625
}
3054830626
else if (varTypeIsIntegral(simdBaseType))
3054930627
{
@@ -30574,11 +30652,15 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
3057430652
}
3057530653
else if (simdSize == 32)
3057630654
{
30577-
id = NI_AVX_CompareGreaterThan;
30655+
id = reverseCond ? NI_AVX_CompareNotLessThanOrEqual : NI_AVX_CompareGreaterThan;
30656+
}
30657+
else if (isScalar)
30658+
{
30659+
reverseCond ? NI_X86Base_CompareScalarNotLessThanOrEqual : NI_X86Base_CompareScalarGreaterThan;
3057830660
}
3057930661
else
3058030662
{
30581-
id = isScalar ? NI_X86Base_CompareScalarGreaterThan : NI_X86Base_CompareGreaterThan;
30663+
id = reverseCond ? NI_X86Base_CompareNotLessThanOrEqual : NI_X86Base_CompareGreaterThan;
3058230664
}
3058330665
#elif defined(TARGET_ARM64)
3058430666
if (genTypeSize(simdBaseType) == 8)
@@ -30600,7 +30682,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
3060030682
#if defined(TARGET_XARCH)
3060130683
if (varTypeIsMask(type))
3060230684
{
30603-
id = NI_AVX512_CompareLessThanOrEqualMask;
30685+
id = reverseCond ? NI_AVX512_CompareNotGreaterThanMask : NI_AVX512_CompareLessThanOrEqualMask;
3060430686
}
3060530687
else if (varTypeIsIntegral(simdBaseType))
3060630688
{
@@ -30609,11 +30691,15 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
3060930691
}
3061030692
else if (simdSize == 32)
3061130693
{
30612-
id = NI_AVX_CompareLessThanOrEqual;
30694+
id = reverseCond ? NI_AVX_CompareNotGreaterThan : NI_AVX_CompareLessThanOrEqual;
30695+
}
30696+
else if (isScalar)
30697+
{
30698+
id = reverseCond ? NI_X86Base_CompareScalarNotGreaterThan : NI_X86Base_CompareScalarLessThanOrEqual;
3061330699
}
3061430700
else
3061530701
{
30616-
id = isScalar ? NI_X86Base_CompareScalarLessThanOrEqual : NI_X86Base_CompareLessThanOrEqual;
30702+
id = reverseCond ? NI_X86Base_CompareNotGreaterThan : NI_X86Base_CompareLessThanOrEqual;
3061730703
}
3061830704
#elif defined(TARGET_ARM64)
3061930705
if (genTypeSize(simdBaseType) == 8)
@@ -30633,10 +30719,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
3063330719
{
3063430720
assert(op2->TypeIs(simdType));
3063530721

30722+
// !GE
30723+
3063630724
#if defined(TARGET_XARCH)
3063730725
if (varTypeIsMask(type))
3063830726
{
30639-
id = NI_AVX512_CompareLessThanMask;
30727+
id = reverseCond ? NI_AVX512_CompareNotGreaterThanOrEqualMask : NI_AVX512_CompareLessThanMask;
3064030728
}
3064130729
else if (varTypeIsIntegral(simdBaseType))
3064230730
{
@@ -30667,11 +30755,15 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
3066730755
}
3066830756
else if (simdSize == 32)
3066930757
{
30670-
id = NI_AVX_CompareLessThan;
30758+
id = reverseCond ? NI_AVX_CompareNotGreaterThanOrEqual : NI_AVX_CompareLessThan;
30759+
}
30760+
else if (isScalar)
30761+
{
30762+
id = reverseCond ? NI_X86Base_CompareScalarNotGreaterThanOrEqual : NI_X86Base_CompareScalarLessThan;
3067130763
}
3067230764
else
3067330765
{
30674-
id = isScalar ? NI_X86Base_CompareScalarLessThan : NI_X86Base_CompareLessThan;
30766+
id = reverseCond ? NI_X86Base_CompareNotGreaterThanOrEqual : NI_X86Base_CompareLessThan;
3067530767
}
3067630768
#elif defined(TARGET_ARM64)
3067730769
if (genTypeSize(simdBaseType) == 8)
@@ -30729,6 +30821,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
3072930821
// type - The expected IR type of the comparison
3073030822
// simdBaseType - The base type on which oper is executed
3073130823
// simdSize - The simd size on which oper is executed
30824+
// reverseCond - True if the oper should be reversed; otherwise false
3073230825
//
3073330826
// Returns:
3073430827
// The lookup type for the given operation given the expected IR type, base type, and simd size
@@ -30739,7 +30832,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
3073930832
// may expect a TYP_SIMD16 but the underlying instruction may produce a TYP_MASK.
3074030833
//
3074130834
var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp(
30742-
Compiler* comp, genTreeOps oper, var_types type, var_types simdBaseType, unsigned simdSize)
30835+
Compiler* comp, genTreeOps oper, var_types type, var_types simdBaseType, unsigned simdSize, bool reverseCond)
3074330836
{
3074430837
var_types simdType = comp->getSIMDTypeForSize(simdSize);
3074530838
assert(varTypeIsMask(type) || (type == simdType));
@@ -30750,6 +30843,11 @@ var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp(
3075030843
var_types lookupType = type;
3075130844

3075230845
#if defined(TARGET_XARCH)
30846+
if (reverseCond)
30847+
{
30848+
oper = ReverseRelop(oper);
30849+
}
30850+
3075330851
switch (oper)
3075430852
{
3075530853
case GT_EQ:

src/coreclr/jit/gentree.h

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6640,17 +6640,19 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
66406640
GenTree* op2,
66416641
var_types simdBaseType,
66426642
unsigned simdSize,
6643-
bool isScalar);
6643+
bool isScalar,
6644+
bool reverseCond = false);
66446645

6645-
static var_types GetLookupTypeForCmpOp(
6646-
Compiler* comp, genTreeOps oper, var_types type, var_types simdBaseType, unsigned simdSize);
6646+
static var_types GetLookupTypeForCmpOp(Compiler* comp,
6647+
genTreeOps oper,
6648+
var_types type,
6649+
var_types simdBaseType,
6650+
unsigned simdSize,
6651+
bool reverseCond = false);
66476652

66486653
static genTreeOps GetOperForHWIntrinsicId(NamedIntrinsic id, var_types simdBaseType, bool* isScalar);
66496654

6650-
genTreeOps GetOperForHWIntrinsicId(bool* isScalar) const
6651-
{
6652-
return GetOperForHWIntrinsicId(GetHWIntrinsicId(), GetSimdBaseType(), isScalar);
6653-
}
6655+
genTreeOps GetOperForHWIntrinsicId(bool* isScalar, bool getEffectiveOp = false) const;
66546656

66556657
bool ShouldConstantProp(GenTree* operand, GenTreeVecCon* vecCon);
66566658

src/coreclr/jit/lowerxarch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9888,7 +9888,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
98889888

98899889
LIR::Use use;
98909890

9891-
if ((oper == GT_XOR) && BlockRange().TryGetUse(node, &use) &&
9891+
if ((oper == GT_XOR) && isEmbeddedBroadcastCompatible && BlockRange().TryGetUse(node, &use) &&
98929892
use.User()->OperIsVectorFusedMultiplyOp())
98939893
{
98949894
// xor is bitwise and the actual xor node might be a different base type

0 commit comments

Comments
 (0)