@@ -29097,9 +29097,60 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId)
29097
29097
// GetOperForHWIntrinsicId: Returns oper based on the intrinsic ID and base type
29098
29098
//
29099
29099
// Arguments:
29100
- // id - The intrinsic ID for which to get the oper
29101
- // simdBaseType - The base type on which id is executed
29102
- // isScalar - On return, contains true if the oper is over scalar data; otherwise false
29100
+ // isScalar - On return, contains true if the oper is over scalar data; otherwise false
29101
+ // getEffectiveOp - true to check for certain special patterns and return the effective operation
29102
+ //
29103
+ // Returns:
29104
+ // The oper based on the intrinsic ID and base type
29105
+ //
29106
+ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(bool* isScalar, bool getEffectiveOp) const
29107
+ {
29108
+ var_types simdBaseType = GetSimdBaseType();
29109
+ genTreeOps oper = GetOperForHWIntrinsicId(GetHWIntrinsicId(), simdBaseType, isScalar);
29110
+
29111
+ if (getEffectiveOp)
29112
+ {
29113
+ if (oper == GT_SUB)
29114
+ {
29115
+ GenTree* op1 = Op(1);
29116
+
29117
+ if (varTypeIsIntegral(simdBaseType))
29118
+ {
29119
+ if (op1->IsVectorZero())
29120
+ {
29121
+ oper = GT_NEG;
29122
+ }
29123
+ else if (isScalar && op1->IsCnsVec() && op1->AsVecCon()->IsScalarZero(simdBaseType))
29124
+ {
29125
+ oper = GT_NEG;
29126
+ }
29127
+ }
29128
+ }
29129
+ else if (oper == GT_XOR)
29130
+ {
29131
+ GenTree* op2 = Op(2);
29132
+
29133
+ if (op2->IsVectorAllBitsSet())
29134
+ {
29135
+ oper = GT_NOT;
29136
+ }
29137
+ else if (varTypeIsFloating(simdBaseType) && op2->IsVectorNegativeZero(simdBaseType))
29138
+ {
29139
+ oper = GT_NEG;
29140
+ }
29141
+ }
29142
+ }
29143
+
29144
+ return oper;
29145
+ }
29146
+
29147
+ //------------------------------------------------------------------------------
29148
+ // GetOperForHWIntrinsicId: Returns oper based on the intrinsic ID and base type
29149
+ //
29150
+ // Arguments:
29151
+ // id - The intrinsic ID for which to get the oper
29152
+ // simdBaseType - The base type on which id is executed
29153
+ // isScalar - On return, contains true if the oper is over scalar data; otherwise false
29103
29154
//
29104
29155
// Returns:
29105
29156
// The oper based on the intrinsic ID and base type
@@ -30405,6 +30456,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp,
30405
30456
// simdBaseType - The base type on which oper is executed
30406
30457
// simdSize - The simd size on which oper is executed
30407
30458
// isScalar - True if the oper is over scalar data; otherwise false
30459
+ // reverseCond - True if the oper should be reversed; otherwise false
30408
30460
//
30409
30461
// Returns:
30410
30462
// The intrinsic ID based on the oper, base type, and simd size
@@ -30416,7 +30468,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
30416
30468
GenTree* op2,
30417
30469
var_types simdBaseType,
30418
30470
unsigned simdSize,
30419
- bool isScalar)
30471
+ bool isScalar,
30472
+ bool reverseCond)
30420
30473
{
30421
30474
var_types simdType = comp->getSIMDTypeForSize(simdSize);
30422
30475
assert(varTypeIsMask(type) || (type == simdType));
@@ -30453,6 +30506,27 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
30453
30506
30454
30507
NamedIntrinsic id = NI_Illegal;
30455
30508
30509
+ if (reverseCond)
30510
+ {
30511
+ oper = ReverseRelop(oper);
30512
+
30513
+ if (varTypeIsIntegral(simdBaseType))
30514
+ {
30515
+ reverseCond = false;
30516
+ }
30517
+ #if defined(TARGET_ARM64)
30518
+ else if (oper != GT_EQ)
30519
+ {
30520
+ // Unlike xarch, there is no reverse comparison
30521
+ // for floating-point and so we cannot actually
30522
+ // optimize these. The exception is GT_NE which
30523
+ // becomes GT_EQ
30524
+
30525
+ return NI_Illegal;
30526
+ }
30527
+ #endif
30528
+ }
30529
+
30456
30530
switch (oper)
30457
30531
{
30458
30532
case GT_EQ:
@@ -30507,7 +30581,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
30507
30581
#if defined(TARGET_XARCH)
30508
30582
if (varTypeIsMask(type))
30509
30583
{
30510
- id = NI_AVX512_CompareGreaterThanOrEqualMask;
30584
+ id = reverseCond ? NI_AVX512_CompareNotLessThanMask : NI_AVX512_CompareGreaterThanOrEqualMask;
30511
30585
}
30512
30586
else if (varTypeIsIntegral(simdBaseType))
30513
30587
{
@@ -30516,11 +30590,15 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
30516
30590
}
30517
30591
else if (simdSize == 32)
30518
30592
{
30519
- id = NI_AVX_CompareGreaterThanOrEqual;
30593
+ id = reverseCond ? NI_AVX_CompareNotLessThan : NI_AVX_CompareGreaterThanOrEqual;
30594
+ }
30595
+ else if (isScalar)
30596
+ {
30597
+ id = reverseCond ? NI_X86Base_CompareScalarNotLessThan : NI_X86Base_CompareScalarGreaterThanOrEqual;
30520
30598
}
30521
30599
else
30522
30600
{
30523
- id = isScalar ? NI_X86Base_CompareScalarGreaterThanOrEqual : NI_X86Base_CompareGreaterThanOrEqual;
30601
+ id = reverseCond ? NI_X86Base_CompareNotLessThan : NI_X86Base_CompareGreaterThanOrEqual;
30524
30602
}
30525
30603
#elif defined(TARGET_ARM64)
30526
30604
if (genTypeSize(simdBaseType) == 8)
@@ -30543,7 +30621,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
30543
30621
#if defined(TARGET_XARCH)
30544
30622
if (varTypeIsMask(type))
30545
30623
{
30546
- id = NI_AVX512_CompareGreaterThanMask;
30624
+ id = reverseCond ? NI_AVX512_CompareNotLessThanOrEqualMask : NI_AVX512_CompareGreaterThanMask;
30547
30625
}
30548
30626
else if (varTypeIsIntegral(simdBaseType))
30549
30627
{
@@ -30574,11 +30652,15 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
30574
30652
}
30575
30653
else if (simdSize == 32)
30576
30654
{
30577
- id = NI_AVX_CompareGreaterThan;
30655
+ id = reverseCond ? NI_AVX_CompareNotLessThanOrEqual : NI_AVX_CompareGreaterThan;
30656
+ }
30657
+ else if (isScalar)
30658
+ {
30659
+ reverseCond ? NI_X86Base_CompareScalarNotLessThanOrEqual : NI_X86Base_CompareScalarGreaterThan;
30578
30660
}
30579
30661
else
30580
30662
{
30581
- id = isScalar ? NI_X86Base_CompareScalarGreaterThan : NI_X86Base_CompareGreaterThan;
30663
+ id = reverseCond ? NI_X86Base_CompareNotLessThanOrEqual : NI_X86Base_CompareGreaterThan;
30582
30664
}
30583
30665
#elif defined(TARGET_ARM64)
30584
30666
if (genTypeSize(simdBaseType) == 8)
@@ -30600,7 +30682,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
30600
30682
#if defined(TARGET_XARCH)
30601
30683
if (varTypeIsMask(type))
30602
30684
{
30603
- id = NI_AVX512_CompareLessThanOrEqualMask;
30685
+ id = reverseCond ? NI_AVX512_CompareNotGreaterThanMask : NI_AVX512_CompareLessThanOrEqualMask;
30604
30686
}
30605
30687
else if (varTypeIsIntegral(simdBaseType))
30606
30688
{
@@ -30609,11 +30691,15 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
30609
30691
}
30610
30692
else if (simdSize == 32)
30611
30693
{
30612
- id = NI_AVX_CompareLessThanOrEqual;
30694
+ id = reverseCond ? NI_AVX_CompareNotGreaterThan : NI_AVX_CompareLessThanOrEqual;
30695
+ }
30696
+ else if (isScalar)
30697
+ {
30698
+ id = reverseCond ? NI_X86Base_CompareScalarNotGreaterThan : NI_X86Base_CompareScalarLessThanOrEqual;
30613
30699
}
30614
30700
else
30615
30701
{
30616
- id = isScalar ? NI_X86Base_CompareScalarLessThanOrEqual : NI_X86Base_CompareLessThanOrEqual;
30702
+ id = reverseCond ? NI_X86Base_CompareNotGreaterThan : NI_X86Base_CompareLessThanOrEqual;
30617
30703
}
30618
30704
#elif defined(TARGET_ARM64)
30619
30705
if (genTypeSize(simdBaseType) == 8)
@@ -30633,10 +30719,12 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
30633
30719
{
30634
30720
assert(op2->TypeIs(simdType));
30635
30721
30722
+ // !GE
30723
+
30636
30724
#if defined(TARGET_XARCH)
30637
30725
if (varTypeIsMask(type))
30638
30726
{
30639
- id = NI_AVX512_CompareLessThanMask;
30727
+ id = reverseCond ? NI_AVX512_CompareNotGreaterThanOrEqualMask : NI_AVX512_CompareLessThanMask;
30640
30728
}
30641
30729
else if (varTypeIsIntegral(simdBaseType))
30642
30730
{
@@ -30667,11 +30755,15 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
30667
30755
}
30668
30756
else if (simdSize == 32)
30669
30757
{
30670
- id = NI_AVX_CompareLessThan;
30758
+ id = reverseCond ? NI_AVX_CompareNotGreaterThanOrEqual : NI_AVX_CompareLessThan;
30759
+ }
30760
+ else if (isScalar)
30761
+ {
30762
+ id = reverseCond ? NI_X86Base_CompareScalarNotGreaterThanOrEqual : NI_X86Base_CompareScalarLessThan;
30671
30763
}
30672
30764
else
30673
30765
{
30674
- id = isScalar ? NI_X86Base_CompareScalarLessThan : NI_X86Base_CompareLessThan;
30766
+ id = reverseCond ? NI_X86Base_CompareNotGreaterThanOrEqual : NI_X86Base_CompareLessThan;
30675
30767
}
30676
30768
#elif defined(TARGET_ARM64)
30677
30769
if (genTypeSize(simdBaseType) == 8)
@@ -30729,6 +30821,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
30729
30821
// type - The expected IR type of the comparison
30730
30822
// simdBaseType - The base type on which oper is executed
30731
30823
// simdSize - The simd size on which oper is executed
30824
+ // reverseCond - True if the oper should be reversed; otherwise false
30732
30825
//
30733
30826
// Returns:
30734
30827
// The lookup type for the given operation given the expected IR type, base type, and simd size
@@ -30739,7 +30832,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp,
30739
30832
// may expect a TYP_SIMD16 but the underlying instruction may produce a TYP_MASK.
30740
30833
//
30741
30834
var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp(
30742
- Compiler* comp, genTreeOps oper, var_types type, var_types simdBaseType, unsigned simdSize)
30835
+ Compiler* comp, genTreeOps oper, var_types type, var_types simdBaseType, unsigned simdSize, bool reverseCond )
30743
30836
{
30744
30837
var_types simdType = comp->getSIMDTypeForSize(simdSize);
30745
30838
assert(varTypeIsMask(type) || (type == simdType));
@@ -30750,6 +30843,11 @@ var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp(
30750
30843
var_types lookupType = type;
30751
30844
30752
30845
#if defined(TARGET_XARCH)
30846
+ if (reverseCond)
30847
+ {
30848
+ oper = ReverseRelop(oper);
30849
+ }
30850
+
30753
30851
switch (oper)
30754
30852
{
30755
30853
case GT_EQ:
0 commit comments