@@ -1471,9 +1471,23 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
1471
1471
op2 = userIntrin->Op (1 );
1472
1472
}
1473
1473
1474
- NamedIntrinsic intrinsic =
1475
- GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp (comp, GT_AND_NOT, op1, op2, simdBaseType,
1476
- simdSize, false );
1474
+ NamedIntrinsic intrinsic = NI_Illegal;
1475
+
1476
+ if (comp->IsBaselineSimdIsaSupported ())
1477
+ {
1478
+ intrinsic = GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp (comp, GT_AND_NOT, op1, op2,
1479
+ simdBaseType, simdSize, false );
1480
+ }
1481
+ else
1482
+ {
1483
+ // We need to ensure we optimize even if SSE2 is disabled
1484
+
1485
+ assert (simdBaseType == TYP_FLOAT);
1486
+ assert (simdSize <= 16 );
1487
+
1488
+ intrinsic = NI_SSE_AndNot;
1489
+ }
1490
+
1477
1491
userIntrin->ResetHWIntrinsicId (intrinsic, comp, op1, op2);
1478
1492
1479
1493
return nextNode;
@@ -1487,24 +1501,55 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
1487
1501
BlockRange ().Remove (node);
1488
1502
op3 = userIntrin->Op (2 );
1489
1503
1504
+ // Tracks which two operands get used first
1505
+ TernaryLogicUseFlags firstOpUseFlags = TernaryLogicUseFlags::AB;
1506
+
1490
1507
if (op3 == node)
1491
1508
{
1492
- op3 = userIntrin->Op (1 );
1509
+ if (userOper == GT_AND_NOT)
1510
+ {
1511
+ op3 = op2;
1512
+ op2 = op1;
1513
+ op1 = userIntrin->Op (1 );
1514
+
1515
+ // AND_NOT isn't commutative so we need to shift parameters down
1516
+ firstOpUseFlags = TernaryLogicUseFlags::BC;
1517
+ }
1518
+ else
1519
+ {
1520
+ op3 = userIntrin->Op (1 );
1521
+ }
1493
1522
}
1494
1523
1495
1524
uint8_t controlByte = 0x00 ;
1496
1525
1497
1526
if ((userOper == GT_XOR) && op3->IsVectorAllBitsSet ())
1498
1527
{
1499
- // We're being used by what is actually GT_NOT, so we
1500
- // need to shift parameters down so that A is unused
1528
+ // We have XOR(OP(A, B), AllBitsSet)
1529
+ // A: op1
1530
+ // B: op2
1531
+ // C: op3 (AllBitsSet)
1532
+ //
1533
+ // We want A to be the unused parameter so swap it around
1534
+ // A: op3 (AllBitsSet)
1535
+ // B: op1
1536
+ // C: op2
1537
+ //
1538
+ // This gives us NOT(OP(B, C))
1539
+
1540
+ assert (firstOpUseFlags == TernaryLogicUseFlags::AB);
1501
1541
1502
1542
std::swap (op2, op3);
1503
1543
std::swap (op1, op2);
1504
1544
1505
1545
if (isOperNot)
1506
1546
{
1507
- // We have what is actually a double not, so just return op2
1547
+ // We have NOT(XOR(B, AllBitsSet))
1548
+ // A: op3 (AllBitsSet)
1549
+ // B: op1
1550
+ // C: op2 (AllBitsSet)
1551
+ //
1552
+ // This represents a double not, so so just return op2
1508
1553
// which is the only actual value now that the parameters
1509
1554
// were shifted around
1510
1555
@@ -1538,20 +1583,64 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
1538
1583
}
1539
1584
else if (isOperNot)
1540
1585
{
1541
- // A is unused, so we just want OP(NOT(B), C)
1586
+ if (firstOpUseFlags == TernaryLogicUseFlags::AB)
1587
+ {
1588
+ // We have OP(XOR(A, AllBitsSet), C)
1589
+ // A: op1
1590
+ // B: op2 (AllBitsSet)
1591
+ // C: op3
1592
+ //
1593
+ // We want A to be the unused parameter so swap it around
1594
+ // A: op2 (AllBitsSet)
1595
+ // B: op1
1596
+ // C: op3
1597
+ //
1598
+ // This gives us OP(NOT(B), C)
1542
1599
1543
- assert (op2->IsVectorAllBitsSet ());
1544
- std::swap (op1, op2);
1600
+ assert (op2->IsVectorAllBitsSet ());
1601
+ std::swap (op1, op2);
1545
1602
1546
- controlByte = static_cast <uint8_t >(~B);
1547
- controlByte = TernaryLogicInfo::GetTernaryControlByte (userOper, controlByte, C);
1603
+ controlByte = static_cast <uint8_t >(~B);
1604
+ controlByte = TernaryLogicInfo::GetTernaryControlByte (userOper, controlByte, C);
1605
+ }
1606
+ else
1607
+ {
1608
+ // We have OP(A, XOR(B, AllBitsSet))
1609
+ // A: op1
1610
+ // B: op2
1611
+ // C: op3 (AllBitsSet)
1612
+ //
1613
+ // We want A to be the unused parameter so swap it around
1614
+ // A: op3 (AllBitsSet)
1615
+ // B: op1
1616
+ // C: op2
1617
+ //
1618
+ // This gives us OP(B, NOT(C))
1619
+
1620
+ assert (firstOpUseFlags == TernaryLogicUseFlags::BC);
1621
+
1622
+ assert (op3->IsVectorAllBitsSet ());
1623
+ std::swap (op2, op3);
1624
+ std::swap (op1, op2);
1625
+
1626
+ controlByte = static_cast <uint8_t >(~C);
1627
+ controlByte = TernaryLogicInfo::GetTernaryControlByte (userOper, B, controlByte);
1628
+ }
1548
1629
}
1549
- else
1630
+ else if (firstOpUseFlags == TernaryLogicUseFlags::AB)
1550
1631
{
1551
1632
// We have OP2(OP1(A, B), C)
1552
1633
controlByte = TernaryLogicInfo::GetTernaryControlByte (oper, A, B);
1553
1634
controlByte = TernaryLogicInfo::GetTernaryControlByte (userOper, controlByte, C);
1554
1635
}
1636
+ else
1637
+ {
1638
+ // We have OP2(A, OP1(B, C))
1639
+ assert (firstOpUseFlags == TernaryLogicUseFlags::BC);
1640
+
1641
+ controlByte = TernaryLogicInfo::GetTernaryControlByte (oper, B, C);
1642
+ controlByte = TernaryLogicInfo::GetTernaryControlByte (userOper, A, controlByte);
1643
+ }
1555
1644
1556
1645
NamedIntrinsic ternaryLogicId = NI_AVX512F_TernaryLogic;
1557
1646
0 commit comments