Skip to content

Commit 0789f5d

Browse files
committed
!fixup address latest comments, thanks!
1 parent 82f5e6a commit 0789f5d

File tree

3 files changed

+71
-44
lines changed

3 files changed

+71
-44
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ class VPBuilder {
226226

227227
/// TODO: The following VectorizationFactor was pulled out of
228228
/// LoopVectorizationCostModel class. LV also deals with
229-
/// VectorizerParams::VectorizationFactor and VectorizationCostTy.
229+
/// VectorizerParams::VectorizationFactor.
230230
/// We need to streamline them.
231231

232232
/// Information about vectorization costs.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 58 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,7 +1090,7 @@ class LoopVectorizationCostModel {
10901090
bool selectUserVectorizationFactor(ElementCount UserVF) {
10911091
collectUniformsAndScalars(UserVF);
10921092
collectInstsToScalarize(UserVF);
1093-
return expectedCost(UserVF).first.isValid();
1093+
return expectedCost(UserVF).isValid();
10941094
}
10951095

10961096
/// \return The size (in bits) of the smallest and widest types in the code
@@ -1591,20 +1591,13 @@ class LoopVectorizationCostModel {
15911591
Scalars.clear();
15921592
}
15931593

1594-
/// The vectorization cost is a combination of the cost itself and a boolean
1595-
/// indicating whether any of the contributing operations will actually
1596-
/// operate on vector values after type legalization in the backend. If this
1597-
/// latter value is false, then all operations will be scalarized (i.e. no
1598-
/// vectorization has actually taken place).
1599-
using VectorizationCostTy = std::pair<InstructionCost, bool>;
1600-
16011594
/// Returns the expected execution cost. The unit of the cost does
16021595
/// not matter because we use the 'cost' units to compare different
16031596
/// vector widths. The cost that is returned is *not* normalized by
16041597
/// the factor width. If \p Invalid is not nullptr, this function
16051598
/// will add a pair(Instruction*, ElementCount) to \p Invalid for
16061599
/// each instruction that has an Invalid cost for the given VF.
1607-
VectorizationCostTy
1600+
InstructionCost
16081601
expectedCost(ElementCount VF,
16091602
SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
16101603

@@ -4870,32 +4863,67 @@ static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
48704863

48714864
static bool willGenerateVectorInstructions(VPlan &Plan, ElementCount VF,
48724865
const TargetTransformInfo &TTI) {
4866+
assert(VF.isVector() && "Checking a scalar VF?");
48734867
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(),
48744868
Plan.getCanonicalIV()->getScalarType()->getContext());
48754869
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
48764870
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
48774871
for (VPRecipeBase &R : *VPBB) {
4878-
if (isa<VPDerivedIVRecipe, VPScalarIVStepsRecipe, VPScalarCastRecipe,
4879-
VPReplicateRecipe, VPInstruction, VPActiveLaneMaskPHIRecipe,
4880-
VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
4881-
VPVectorPointerRecipe>(&R))
4872+
switch (R.getVPDefID()) {
4873+
case VPDef::VPDerivedIVSC:
4874+
case VPDef::VPScalarIVStepsSC:
4875+
case VPDef::VPScalarCastSC:
4876+
case VPDef::VPReplicateSC:
4877+
case VPDef::VPInstructionSC:
4878+
case VPDef::VPCanonicalIVPHISC:
4879+
case VPDef::VPVectorPointerSC:
4880+
case VPDef::VPExpandSCEVSC:
4881+
case VPDef::VPEVLBasedIVPHISC:
4882+
case VPDef::VPPredInstPHISC:
4883+
case VPDef::VPBranchOnMaskSC:
48824884
continue;
4885+
case VPDef::VPReductionSC:
4886+
case VPDef::VPActiveLaneMaskPHISC:
4887+
case VPDef::VPWidenCallSC:
4888+
case VPDef::VPWidenCanonicalIVSC:
4889+
case VPDef::VPWidenCastSC:
4890+
case VPDef::VPWidenGEPSC:
4891+
case VPDef::VPWidenSC:
4892+
case VPDef::VPWidenSelectSC:
4893+
case VPDef::VPBlendSC:
4894+
case VPDef::VPFirstOrderRecurrencePHISC:
4895+
case VPDef::VPWidenPHISC:
4896+
case VPDef::VPWidenIntOrFpInductionSC:
4897+
case VPDef::VPWidenPointerInductionSC:
4898+
case VPDef::VPReductionPHISC:
4899+
case VPDef::VPInterleaveSC:
4900+
case VPDef::VPWidenLoadEVLSC:
4901+
case VPDef::VPWidenLoadSC:
4902+
case VPDef::VPWidenStoreEVLSC:
4903+
case VPDef::VPWidenStoreSC:
4904+
break;
4905+
default:
4906+
llvm_unreachable("unhandled recipe");
4907+
}
48834908

48844909
auto WillWiden = [&TypeInfo, &TTI, VF](VPValue *VPV) {
48854910
Type *ScalarTy = TypeInfo.inferScalarType(VPV);
48864911
Type *VectorTy = ToVectorTy(ScalarTy, VF);
4887-
unsigned NumParts = TTI.getNumberOfParts(VectorTy);
4888-
if (!NumParts)
4912+
unsigned NumLegalParts = TTI.getNumberOfParts(VectorTy);
4913+
if (!NumLegalParts)
48894914
return false;
4890-
if (VF.isScalable())
4915+
if (VF.isScalable()) {
48914916
// <vscale x 1 x iN> is assumed to be profitable over iN because
48924917
// scalable registers are a distinct register class from scalar ones.
48934918
// If we ever find a target which wants to lower scalable vectors
48944919
// back to scalars, we'll need to update this code to explicitly
48954920
// ask TTI about the register class uses for each part.
4896-
return NumParts <= VF.getKnownMinValue();
4897-
else
4898-
return NumParts < VF.getKnownMinValue();
4921+
return NumLegalParts <= VF.getKnownMinValue();
4922+
}
4923+
// Two or more parts that share a register - are vectorized.
4924+
assert(NumLegalParts <= VF.getKnownMinValue() &&
4925+
"More parts than elements?");
4926+
return NumLegalParts < VF.getKnownMinValue();
48994927
};
49004928
SmallVector<VPValue *> VPValuesToCheck;
49014929
if (auto *WidenStore = dyn_cast<VPWidenStoreRecipe>(&R)) {
@@ -4915,8 +4943,7 @@ static bool willGenerateVectorInstructions(VPlan &Plan, ElementCount VF,
49154943
}
49164944

49174945
VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4918-
InstructionCost ExpectedCost =
4919-
CM.expectedCost(ElementCount::getFixed(1)).first;
4946+
InstructionCost ExpectedCost = CM.expectedCost(ElementCount::getFixed(1));
49204947
LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n");
49214948
assert(ExpectedCost.isValid() && "Unexpected invalid cost for scalar loop");
49224949
assert(any_of(VPlans,
@@ -4945,9 +4972,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
49454972
if (VF.isScalar())
49464973
continue;
49474974

4948-
LoopVectorizationCostModel::VectorizationCostTy C =
4949-
CM.expectedCost(VF, &InvalidCosts);
4950-
VectorizationFactor Candidate(VF, C.first, ScalarCost.ScalarCost);
4975+
InstructionCost C = CM.expectedCost(VF, &InvalidCosts);
4976+
VectorizationFactor Candidate(VF, C, ScalarCost.ScalarCost);
49514977

49524978
#ifndef NDEBUG
49534979
unsigned AssumedMinimumVscale =
@@ -4964,7 +4990,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
49644990
LLVM_DEBUG(dbgs() << ".\n");
49654991
#endif
49664992

4967-
if (!willGenerateVectorInstructions(*P, VF, TTI) && !ForceVectorization) {
4993+
if (!ForceVectorization && !willGenerateVectorInstructions(*P, VF, TTI)) {
49684994
LLVM_DEBUG(
49694995
dbgs()
49704996
<< "LV: Not considering vector loop of width " << VF
@@ -5265,7 +5291,7 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
52655291
// If we did not calculate the cost for VF (because the user selected the VF)
52665292
// then we calculate the cost of VF here.
52675293
if (LoopCost == 0) {
5268-
LoopCost = expectedCost(VF).first;
5294+
LoopCost = expectedCost(VF);
52695295
assert(LoopCost.isValid() && "Expected to have chosen a VF with valid cost");
52705296

52715297
// Loop body is free and there is no need for interleaving.
@@ -5887,14 +5913,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
58875913
return Discount;
58885914
}
58895915

5890-
LoopVectorizationCostModel::VectorizationCostTy
5891-
LoopVectorizationCostModel::expectedCost(
5916+
InstructionCost LoopVectorizationCostModel::expectedCost(
58925917
ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid) {
5893-
VectorizationCostTy Cost;
5918+
InstructionCost Cost;
58945919

58955920
// For each block.
58965921
for (BasicBlock *BB : TheLoop->blocks()) {
5897-
VectorizationCostTy BlockCost;
5922+
InstructionCost BlockCost;
58985923

58995924
// For each instruction in the old loop.
59005925
for (Instruction &I : BB->instructionsWithoutDebug()) {
@@ -5913,7 +5938,7 @@ LoopVectorizationCostModel::expectedCost(
59135938
if (Invalid && !C.isValid())
59145939
Invalid->emplace_back(&I, VF);
59155940

5916-
BlockCost.first += C;
5941+
BlockCost += C;
59175942
LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF "
59185943
<< VF << " For instruction: " << I << '\n');
59195944
}
@@ -5926,10 +5951,9 @@ LoopVectorizationCostModel::expectedCost(
59265951
// cost by the probability of executing it. blockNeedsPredication from
59275952
// Legal is used so as to not include all blocks in tail folded loops.
59285953
if (VF.isScalar() && Legal->blockNeedsPredication(BB))
5929-
BlockCost.first /= getReciprocalPredBlockProb();
5954+
BlockCost /= getReciprocalPredBlockProb();
59305955

5931-
Cost.first += BlockCost.first;
5932-
Cost.second |= BlockCost.second;
5956+
Cost += BlockCost;
59335957
}
59345958

59355959
return Cost;

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
5454
return ResTy;
5555
}
5656
case Instruction::ICmp:
57+
case VPInstruction::ActiveLaneMask:
58+
return inferScalarType(R->getOperand(1));
5759
case VPInstruction::FirstOrderRecurrenceSplice:
5860
case VPInstruction::Not:
5961
return SetResultTyFromOp();
@@ -240,15 +242,16 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
240242

241243
Type *ResultTy =
242244
TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
243-
.Case<VPCanonicalIVPHIRecipe, VPFirstOrderRecurrencePHIRecipe,
244-
VPReductionPHIRecipe, VPWidenPointerInductionRecipe,
245-
VPEVLBasedIVPHIRecipe>([this](const auto *R) {
246-
// Handle header phi recipes, except VPWidenIntOrFpInduction
247-
// which needs special handling due it being possibly truncated.
248-
// TODO: consider inferring/caching type of siblings, e.g.,
249-
// backedge value, here and in cases below.
250-
return inferScalarType(R->getStartValue());
251-
})
245+
.Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
246+
VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
247+
VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>(
248+
[this](const auto *R) {
249+
// Handle header phi recipes, except VPWidenIntOrFpInduction
250+
// which needs special handling due it being possibly truncated.
251+
// TODO: consider inferring/caching type of siblings, e.g.,
252+
// backedge value, here and in cases below.
253+
return inferScalarType(R->getStartValue());
254+
})
252255
.Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
253256
[](const auto *R) { return R->getScalarType(); })
254257
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,

0 commit comments

Comments
 (0)