@@ -260,20 +260,6 @@ static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) {
260
260
VF * getNumElements(ScalarTy));
261
261
}
262
262
263
- /// Returns the number of elements of the given type \p Ty, not less than \p Sz,
264
- /// which forms type, which splits by \p TTI into whole vector types during
265
- /// legalization.
266
- static unsigned getFullVectorNumberOfElements(const TargetTransformInfo &TTI,
267
- Type *Ty, unsigned Sz) {
268
- if (!isValidElementType(Ty))
269
- return bit_ceil(Sz);
270
- // Find the number of elements, which forms full vectors.
271
- const unsigned NumParts = TTI.getNumberOfParts(getWidenedType(Ty, Sz));
272
- if (NumParts == 0 || NumParts >= Sz)
273
- return bit_ceil(Sz);
274
- return bit_ceil(divideCeil(Sz, NumParts)) * NumParts;
275
- }
276
-
277
263
static void transformScalarShuffleIndiciesToVector(unsigned VecTyNumElements,
278
264
SmallVectorImpl<int> &Mask) {
279
265
// The ShuffleBuilder implementation use shufflevector to splat an "element".
@@ -408,7 +394,7 @@ static bool isVectorLikeInstWithConstOps(Value *V) {
408
394
/// total number of elements \p Size and number of registers (parts) \p
409
395
/// NumParts.
410
396
static unsigned getPartNumElems(unsigned Size, unsigned NumParts) {
411
- return std::min<unsigned>(Size, bit_ceil( divideCeil(Size, NumParts) ));
397
+ return PowerOf2Ceil( divideCeil(Size, NumParts));
412
398
}
413
399
414
400
/// Returns correct remaining number of elements, considering total amount \p
@@ -1236,22 +1222,6 @@ static bool doesNotNeedToSchedule(ArrayRef<Value *> VL) {
1236
1222
(all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts));
1237
1223
}
1238
1224
1239
- /// Returns true if widened type of \p Ty elements with size \p Sz represents
1240
- /// full vector type, i.e. adding extra element results in extra parts upon type
1241
- /// legalization.
1242
- static bool hasFullVectorsOrPowerOf2(const TargetTransformInfo &TTI, Type *Ty,
1243
- unsigned Sz) {
1244
- if (Sz <= 1)
1245
- return false;
1246
- if (!isValidElementType(Ty) && !isa<FixedVectorType>(Ty))
1247
- return false;
1248
- if (has_single_bit(Sz))
1249
- return true;
1250
- const unsigned NumParts = TTI.getNumberOfParts(getWidenedType(Ty, Sz));
1251
- return NumParts > 0 && NumParts < Sz && has_single_bit(Sz / NumParts) &&
1252
- Sz % NumParts == 0;
1253
- }
1254
-
1255
1225
namespace slpvectorizer {
1256
1226
1257
1227
/// Bottom Up SLP Vectorizer.
@@ -3341,15 +3311,6 @@ class BoUpSLP {
3341
3311
/// Return true if this is a non-power-of-2 node.
3342
3312
bool isNonPowOf2Vec() const {
3343
3313
bool IsNonPowerOf2 = !has_single_bit(Scalars.size());
3344
- return IsNonPowerOf2;
3345
- }
3346
-
3347
- /// Return true if this is a node, which tries to vectorize number of
3348
- /// elements, forming whole vectors.
3349
- bool
3350
- hasNonWholeRegisterOrNonPowerOf2Vec(const TargetTransformInfo &TTI) const {
3351
- bool IsNonPowerOf2 = !hasFullVectorsOrPowerOf2(
3352
- TTI, getValueType(Scalars.front()), Scalars.size());
3353
3314
assert((!IsNonPowerOf2 || ReuseShuffleIndices.empty()) &&
3354
3315
"Reshuffling not supported with non-power-of-2 vectors yet.");
3355
3316
return IsNonPowerOf2;
@@ -3469,10 +3430,8 @@ class BoUpSLP {
3469
3430
Last->State = EntryState;
3470
3431
// FIXME: Remove once support for ReuseShuffleIndices has been implemented
3471
3432
// for non-power-of-two vectors.
3472
- assert(
3473
- (hasFullVectorsOrPowerOf2(*TTI, getValueType(VL.front()), VL.size()) ||
3474
- ReuseShuffleIndices.empty()) &&
3475
- "Reshuffling scalars not yet supported for nodes with padding");
3433
+ assert((has_single_bit(VL.size()) || ReuseShuffleIndices.empty()) &&
3434
+ "Reshuffling scalars not yet supported for nodes with padding");
3476
3435
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
3477
3436
ReuseShuffleIndices.end());
3478
3437
if (ReorderIndices.empty()) {
@@ -5310,7 +5269,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
5310
5269
// node.
5311
5270
if (!TE.ReuseShuffleIndices.empty()) {
5312
5271
// FIXME: Support ReuseShuffleIndices for non-power-of-two vectors.
5313
- assert(!TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI ) &&
5272
+ assert(!TE.isNonPowOf2Vec( ) &&
5314
5273
"Reshuffling scalars not yet supported for nodes with padding");
5315
5274
5316
5275
if (isSplat(TE.Scalars))
@@ -5550,7 +5509,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
5550
5509
}
5551
5510
// FIXME: Remove the non-power-of-two check once findReusedOrderedScalars
5552
5511
// has been auditted for correctness with non-power-of-two vectors.
5553
- if (!TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI ))
5512
+ if (!TE.isNonPowOf2Vec( ))
5554
5513
if (std::optional<OrdersType> CurrentOrder = findReusedOrderedScalars(TE))
5555
5514
return CurrentOrder;
5556
5515
}
@@ -5703,18 +5662,15 @@ void BoUpSLP::reorderTopToBottom() {
5703
5662
});
5704
5663
5705
5664
// Reorder the graph nodes according to their vectorization factor.
5706
- for (unsigned VF = VectorizableTree.front()->getVectorFactor();
5707
- !VFToOrderedEntries.empty() && VF > 1; VF -= 2 - (VF & 1U) ) {
5665
+ for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1;
5666
+ VF = bit_ceil (VF) / 2 ) {
5708
5667
auto It = VFToOrderedEntries.find(VF);
5709
5668
if (It == VFToOrderedEntries.end())
5710
5669
continue;
5711
5670
// Try to find the most profitable order. We just are looking for the most
5712
5671
// used order and reorder scalar elements in the nodes according to this
5713
5672
// mostly used order.
5714
5673
ArrayRef<TreeEntry *> OrderedEntries = It->second.getArrayRef();
5715
- // Delete VF entry upon exit.
5716
- auto Cleanup = make_scope_exit([&]() { VFToOrderedEntries.erase(It); });
5717
-
5718
5674
// All operands are reordered and used only in this node - propagate the
5719
5675
// most used order to the user node.
5720
5676
MapVector<OrdersType, unsigned,
@@ -7573,36 +7529,33 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
7573
7529
UniqueValues.emplace_back(V);
7574
7530
}
7575
7531
size_t NumUniqueScalarValues = UniqueValues.size();
7576
- bool IsFullVectors = hasFullVectorsOrPowerOf2(
7577
- *TTI, UniqueValues.front()->getType(), NumUniqueScalarValues);
7578
- if (NumUniqueScalarValues == VL.size() &&
7579
- (VectorizeNonPowerOf2 || IsFullVectors)) {
7532
+ if (NumUniqueScalarValues == VL.size()) {
7580
7533
ReuseShuffleIndices.clear();
7581
7534
} else {
7582
7535
// FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
7583
- if ((UserTreeIdx.UserTE &&
7584
- UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(*TTI)) ||
7585
- !has_single_bit(VL.size())) {
7536
+ if ((UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) ||
7537
+ !llvm::has_single_bit(VL.size())) {
7586
7538
LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
7587
7539
"for nodes with padding.\n");
7588
7540
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
7589
7541
return false;
7590
7542
}
7591
7543
LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
7592
- if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
7593
- (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
7594
- return isa<UndefValue>(V) || !isConstant(V);
7595
- }))) {
7544
+ if (NumUniqueScalarValues <= 1 ||
7545
+ (UniquePositions.size() == 1 && all_of(UniqueValues,
7546
+ [](Value *V) {
7547
+ return isa<UndefValue>(V) ||
7548
+ !isConstant(V);
7549
+ })) ||
7550
+ !llvm::has_single_bit<uint32_t>(NumUniqueScalarValues)) {
7596
7551
if (DoNotFail && UniquePositions.size() > 1 &&
7597
7552
NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() &&
7598
7553
all_of(UniqueValues, [=](Value *V) {
7599
7554
return isa<ExtractElementInst>(V) ||
7600
7555
areAllUsersVectorized(cast<Instruction>(V),
7601
7556
UserIgnoreList);
7602
7557
})) {
7603
- // Find the number of elements, which forms full vectors.
7604
- unsigned PWSz = getFullVectorNumberOfElements(
7605
- *TTI, UniqueValues.front()->getType(), UniqueValues.size());
7558
+ unsigned PWSz = PowerOf2Ceil(UniqueValues.size());
7606
7559
if (PWSz == VL.size()) {
7607
7560
ReuseShuffleIndices.clear();
7608
7561
} else {
@@ -9840,6 +9793,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
9840
9793
return nullptr;
9841
9794
Value *VecBase = nullptr;
9842
9795
ArrayRef<Value *> VL = E->Scalars;
9796
+ // If the resulting type is scalarized, do not adjust the cost.
9797
+ if (NumParts == VL.size())
9798
+ return nullptr;
9843
9799
// Check if it can be considered reused if same extractelements were
9844
9800
// vectorized already.
9845
9801
bool PrevNodeFound = any_of(
@@ -10494,7 +10450,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
10494
10450
InsertMask[Idx] = I + 1;
10495
10451
}
10496
10452
unsigned VecScalarsSz = PowerOf2Ceil(NumElts);
10497
- if (NumOfParts > 0 && NumOfParts < NumElts )
10453
+ if (NumOfParts > 0)
10498
10454
VecScalarsSz = PowerOf2Ceil((NumElts + NumOfParts - 1) / NumOfParts);
10499
10455
unsigned VecSz = (1 + OffsetEnd / VecScalarsSz - OffsetBeg / VecScalarsSz) *
10500
10456
VecScalarsSz;
@@ -17829,7 +17785,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
17829
17785
for (unsigned I = NextInst; I < MaxInst; ++I) {
17830
17786
unsigned ActualVF = std::min(MaxInst - I, VF);
17831
17787
17832
- if (!hasFullVectorsOrPowerOf2(*TTI, ScalarTy, ActualVF))
17788
+ if (!has_single_bit( ActualVF))
17833
17789
continue;
17834
17790
17835
17791
if (MaxVFOnly && ActualVF < MaxVF)
0 commit comments