@@ -1090,7 +1090,7 @@ class LoopVectorizationCostModel {
1090
1090
bool selectUserVectorizationFactor (ElementCount UserVF) {
1091
1091
collectUniformsAndScalars (UserVF);
1092
1092
collectInstsToScalarize (UserVF);
1093
- return expectedCost (UserVF).first . isValid ();
1093
+ return expectedCost (UserVF).isValid ();
1094
1094
}
1095
1095
1096
1096
// / \return The size (in bits) of the smallest and widest types in the code
@@ -1591,20 +1591,13 @@ class LoopVectorizationCostModel {
1591
1591
Scalars.clear ();
1592
1592
}
1593
1593
1594
- // / The vectorization cost is a combination of the cost itself and a boolean
1595
- // / indicating whether any of the contributing operations will actually
1596
- // / operate on vector values after type legalization in the backend. If this
1597
- // / latter value is false, then all operations will be scalarized (i.e. no
1598
- // / vectorization has actually taken place).
1599
- using VectorizationCostTy = std::pair<InstructionCost, bool >;
1600
-
1601
1594
// / Returns the expected execution cost. The unit of the cost does
1602
1595
// / not matter because we use the 'cost' units to compare different
1603
1596
// / vector widths. The cost that is returned is *not* normalized by
1604
1597
// / the factor width. If \p Invalid is not nullptr, this function
1605
1598
// / will add a pair(Instruction*, ElementCount) to \p Invalid for
1606
1599
// / each instruction that has an Invalid cost for the given VF.
1607
- VectorizationCostTy
1600
+ InstructionCost
1608
1601
expectedCost (ElementCount VF,
1609
1602
SmallVectorImpl<InstructionVFPair> *Invalid = nullptr );
1610
1603
@@ -4870,32 +4863,67 @@ static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
4870
4863
4871
4864
static bool willGenerateVectorInstructions (VPlan &Plan, ElementCount VF,
4872
4865
const TargetTransformInfo &TTI) {
4866
+ assert (VF.isVector () && " Checking a scalar VF?" );
4873
4867
VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType (),
4874
4868
Plan.getCanonicalIV ()->getScalarType ()->getContext ());
4875
4869
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
4876
4870
vp_depth_first_shallow (Plan.getVectorLoopRegion ()->getEntry ()))) {
4877
4871
for (VPRecipeBase &R : *VPBB) {
4878
- if (isa<VPDerivedIVRecipe, VPScalarIVStepsRecipe, VPScalarCastRecipe,
4879
- VPReplicateRecipe, VPInstruction, VPActiveLaneMaskPHIRecipe,
4880
- VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
4881
- VPVectorPointerRecipe>(&R))
4872
+ switch (R.getVPDefID ()) {
4873
+ case VPDef::VPDerivedIVSC:
4874
+ case VPDef::VPScalarIVStepsSC:
4875
+ case VPDef::VPScalarCastSC:
4876
+ case VPDef::VPReplicateSC:
4877
+ case VPDef::VPInstructionSC:
4878
+ case VPDef::VPCanonicalIVPHISC:
4879
+ case VPDef::VPVectorPointerSC:
4880
+ case VPDef::VPExpandSCEVSC:
4881
+ case VPDef::VPEVLBasedIVPHISC:
4882
+ case VPDef::VPPredInstPHISC:
4883
+ case VPDef::VPBranchOnMaskSC:
4882
4884
continue ;
4885
+ case VPDef::VPReductionSC:
4886
+ case VPDef::VPActiveLaneMaskPHISC:
4887
+ case VPDef::VPWidenCallSC:
4888
+ case VPDef::VPWidenCanonicalIVSC:
4889
+ case VPDef::VPWidenCastSC:
4890
+ case VPDef::VPWidenGEPSC:
4891
+ case VPDef::VPWidenSC:
4892
+ case VPDef::VPWidenSelectSC:
4893
+ case VPDef::VPBlendSC:
4894
+ case VPDef::VPFirstOrderRecurrencePHISC:
4895
+ case VPDef::VPWidenPHISC:
4896
+ case VPDef::VPWidenIntOrFpInductionSC:
4897
+ case VPDef::VPWidenPointerInductionSC:
4898
+ case VPDef::VPReductionPHISC:
4899
+ case VPDef::VPInterleaveSC:
4900
+ case VPDef::VPWidenLoadEVLSC:
4901
+ case VPDef::VPWidenLoadSC:
4902
+ case VPDef::VPWidenStoreEVLSC:
4903
+ case VPDef::VPWidenStoreSC:
4904
+ break ;
4905
+ default :
4906
+ llvm_unreachable (" unhandled recipe" );
4907
+ }
4883
4908
4884
4909
auto WillWiden = [&TypeInfo, &TTI, VF](VPValue *VPV) {
4885
4910
Type *ScalarTy = TypeInfo.inferScalarType (VPV);
4886
4911
Type *VectorTy = ToVectorTy (ScalarTy, VF);
4887
- unsigned NumParts = TTI.getNumberOfParts (VectorTy);
4888
- if (!NumParts )
4912
+ unsigned NumLegalParts = TTI.getNumberOfParts (VectorTy);
4913
+ if (!NumLegalParts )
4889
4914
return false ;
4890
- if (VF.isScalable ())
4915
+ if (VF.isScalable ()) {
4891
4916
// <vscale x 1 x iN> is assumed to be profitable over iN because
4892
4917
// scalable registers are a distinct register class from scalar ones.
4893
4918
// If we ever find a target which wants to lower scalable vectors
4894
4919
// back to scalars, we'll need to update this code to explicitly
4895
4920
// ask TTI about the register class uses for each part.
4896
- return NumParts <= VF.getKnownMinValue ();
4897
- else
4898
- return NumParts < VF.getKnownMinValue ();
4921
+ return NumLegalParts <= VF.getKnownMinValue ();
4922
+ }
4923
+ // Two or more parts that share a register - are vectorized.
4924
+ assert (NumLegalParts <= VF.getKnownMinValue () &&
4925
+ " More parts than elements?" );
4926
+ return NumLegalParts < VF.getKnownMinValue ();
4899
4927
};
4900
4928
SmallVector<VPValue *> VPValuesToCheck;
4901
4929
if (auto *WidenStore = dyn_cast<VPWidenStoreRecipe>(&R)) {
@@ -4915,8 +4943,7 @@ static bool willGenerateVectorInstructions(VPlan &Plan, ElementCount VF,
4915
4943
}
4916
4944
4917
4945
VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor () {
4918
- InstructionCost ExpectedCost =
4919
- CM.expectedCost (ElementCount::getFixed (1 )).first ;
4946
+ InstructionCost ExpectedCost = CM.expectedCost (ElementCount::getFixed (1 ));
4920
4947
LLVM_DEBUG (dbgs () << " LV: Scalar loop costs: " << ExpectedCost << " .\n " );
4921
4948
assert (ExpectedCost.isValid () && " Unexpected invalid cost for scalar loop" );
4922
4949
assert (any_of (VPlans,
@@ -4945,9 +4972,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4945
4972
if (VF.isScalar ())
4946
4973
continue ;
4947
4974
4948
- LoopVectorizationCostModel::VectorizationCostTy C =
4949
- CM.expectedCost (VF, &InvalidCosts);
4950
- VectorizationFactor Candidate (VF, C.first , ScalarCost.ScalarCost );
4975
+ InstructionCost C = CM.expectedCost (VF, &InvalidCosts);
4976
+ VectorizationFactor Candidate (VF, C, ScalarCost.ScalarCost );
4951
4977
4952
4978
#ifndef NDEBUG
4953
4979
unsigned AssumedMinimumVscale =
@@ -4964,7 +4990,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4964
4990
LLVM_DEBUG (dbgs () << " .\n " );
4965
4991
#endif
4966
4992
4967
- if (!willGenerateVectorInstructions (*P, VF, TTI) && !ForceVectorization ) {
4993
+ if (!ForceVectorization && ! willGenerateVectorInstructions (*P, VF, TTI)) {
4968
4994
LLVM_DEBUG (
4969
4995
dbgs ()
4970
4996
<< " LV: Not considering vector loop of width " << VF
@@ -5265,7 +5291,7 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
5265
5291
// If we did not calculate the cost for VF (because the user selected the VF)
5266
5292
// then we calculate the cost of VF here.
5267
5293
if (LoopCost == 0 ) {
5268
- LoopCost = expectedCost (VF). first ;
5294
+ LoopCost = expectedCost (VF);
5269
5295
assert (LoopCost.isValid () && " Expected to have chosen a VF with valid cost" );
5270
5296
5271
5297
// Loop body is free and there is no need for interleaving.
@@ -5887,14 +5913,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
5887
5913
return Discount;
5888
5914
}
5889
5915
5890
- LoopVectorizationCostModel::VectorizationCostTy
5891
- LoopVectorizationCostModel::expectedCost (
5916
+ InstructionCost LoopVectorizationCostModel::expectedCost (
5892
5917
ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid) {
5893
- VectorizationCostTy Cost;
5918
+ InstructionCost Cost;
5894
5919
5895
5920
// For each block.
5896
5921
for (BasicBlock *BB : TheLoop->blocks ()) {
5897
- VectorizationCostTy BlockCost;
5922
+ InstructionCost BlockCost;
5898
5923
5899
5924
// For each instruction in the old loop.
5900
5925
for (Instruction &I : BB->instructionsWithoutDebug ()) {
@@ -5913,7 +5938,7 @@ LoopVectorizationCostModel::expectedCost(
5913
5938
if (Invalid && !C.isValid ())
5914
5939
Invalid->emplace_back (&I, VF);
5915
5940
5916
- BlockCost. first += C;
5941
+ BlockCost += C;
5917
5942
LLVM_DEBUG (dbgs () << " LV: Found an estimated cost of " << C << " for VF "
5918
5943
<< VF << " For instruction: " << I << ' \n ' );
5919
5944
}
@@ -5926,10 +5951,9 @@ LoopVectorizationCostModel::expectedCost(
5926
5951
// cost by the probability of executing it. blockNeedsPredication from
5927
5952
// Legal is used so as to not include all blocks in tail folded loops.
5928
5953
if (VF.isScalar () && Legal->blockNeedsPredication (BB))
5929
- BlockCost. first /= getReciprocalPredBlockProb ();
5954
+ BlockCost /= getReciprocalPredBlockProb ();
5930
5955
5931
- Cost.first += BlockCost.first ;
5932
- Cost.second |= BlockCost.second ;
5956
+ Cost += BlockCost;
5933
5957
}
5934
5958
5935
5959
return Cost;
0 commit comments