@@ -1347,27 +1347,46 @@ class LoopVectorizationCostModel {
1347
1347
return InterleaveInfo.getInterleaveGroup (Instr);
1348
1348
}
1349
1349
1350
+ // / Calculate in advance whether a scalar epilogue is required when
1351
+ // / vectorizing and not vectorizing. If \p Invalidate is true then
1352
+ // / invalidate a previous decision.
1353
+ void collectScalarEpilogueRequirements (bool Invalidate) {
1354
+ auto NeedsScalarEpilogue = [&](bool IsVectorizing) -> bool {
1355
+ if (!isScalarEpilogueAllowed ()) {
1356
+ LLVM_DEBUG (dbgs () << " LV: Loop does not require scalar epilogue" );
1357
+ return false ;
1358
+ }
1359
+ // If we might exit from anywhere but the latch, must run the exiting
1360
+ // iteration in scalar form.
1361
+ if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch ()) {
1362
+ LLVM_DEBUG (dbgs () << " LV: Loop requires scalar epilogue: not exiting "
1363
+ " from latch block\n " );
1364
+ return true ;
1365
+ }
1366
+ if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue ()) {
1367
+ LLVM_DEBUG (dbgs () << " LV: Loop requires scalar epilogue: "
1368
+ " interleaved group requires scalar epilogue" );
1369
+ return true ;
1370
+ }
1371
+ LLVM_DEBUG (dbgs () << " LV: Loop does not require scalar epilogue" );
1372
+ return false ;
1373
+ };
1374
+
1375
+ assert ((Invalidate || !RequiresScalarEpilogue) &&
1376
+ " Already determined scalar epilogue requirements!" );
1377
+ std::pair<bool , bool > Result;
1378
+ Result.first = NeedsScalarEpilogue (true );
1379
+ LLVM_DEBUG (dbgs () << " , when vectorizing\n " );
1380
+ Result.second = NeedsScalarEpilogue (false );
1381
+ LLVM_DEBUG (dbgs () << " , when not vectorizing\n " );
1382
+ RequiresScalarEpilogue = Result;
1383
+ }
1384
+
1350
1385
// / Returns true if we're required to use a scalar epilogue for at least
1351
1386
// / the final iteration of the original loop.
1352
1387
bool requiresScalarEpilogue (bool IsVectorizing) const {
1353
- if (!isScalarEpilogueAllowed ()) {
1354
- LLVM_DEBUG (dbgs () << " LV: Loop does not require scalar epilogue\n " );
1355
- return false ;
1356
- }
1357
- // If we might exit from anywhere but the latch, must run the exiting
1358
- // iteration in scalar form.
1359
- if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch ()) {
1360
- LLVM_DEBUG (dbgs () << " LV: Loop requires scalar epilogue: not exiting "
1361
- " from latch block\n " );
1362
- return true ;
1363
- }
1364
- if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue ()) {
1365
- LLVM_DEBUG (dbgs () << " LV: Loop requires scalar epilogue: "
1366
- " interleaved group requires scalar epilogue\n " );
1367
- return true ;
1368
- }
1369
- LLVM_DEBUG (dbgs () << " LV: Loop does not require scalar epilogue\n " );
1370
- return false ;
1388
+ auto &CachedResult = *RequiresScalarEpilogue;
1389
+ return IsVectorizing ? CachedResult.first : CachedResult.second ;
1371
1390
}
1372
1391
1373
1392
// / Returns true if we're required to use a scalar epilogue for at least
@@ -1391,6 +1410,15 @@ class LoopVectorizationCostModel {
1391
1410
return ScalarEpilogueStatus == CM_ScalarEpilogueAllowed;
1392
1411
}
1393
1412
1413
+ // / Update the ScalarEpilogueStatus to a new value, potentially triggering a
1414
+ // / recalculation of the scalar epilogue requirements.
1415
+ void setScalarEpilogueStatus (ScalarEpilogueLowering Status) {
1416
+ bool Changed = ScalarEpilogueStatus != Status;
1417
+ ScalarEpilogueStatus = Status;
1418
+ if (Changed)
1419
+ collectScalarEpilogueRequirements (/* Invalidate=*/ true );
1420
+ }
1421
+
1394
1422
// / Returns the TailFoldingStyle that is best for the current loop.
1395
1423
TailFoldingStyle getTailFoldingStyle (bool IVUpdateMayOverflow = true ) const {
1396
1424
if (!ChosenTailFoldingStyle)
@@ -1771,6 +1799,9 @@ class LoopVectorizationCostModel {
1771
1799
1772
1800
// / All element types found in the loop.
1773
1801
SmallPtrSet<Type *, 16 > ElementTypesInLoop;
1802
+
1803
+ // / Keeps track of whether we require a scalar epilogue.
1804
+ std::optional<std::pair<bool , bool >> RequiresScalarEpilogue;
1774
1805
};
1775
1806
} // end namespace llvm
1776
1807
@@ -4058,7 +4089,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
4058
4089
if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) {
4059
4090
LLVM_DEBUG (dbgs () << " LV: Cannot fold tail by masking: vectorize with a "
4060
4091
" scalar epilogue instead.\n " );
4061
- ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
4092
+ setScalarEpilogueStatus ( CM_ScalarEpilogueAllowed) ;
4062
4093
return computeFeasibleMaxVF (MaxTC, UserVF, false );
4063
4094
}
4064
4095
return FixedScalableVFPair::getNone ();
@@ -4074,6 +4105,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
4074
4105
// Note: There is no need to invalidate any cost modeling decisions here, as
4075
4106
// none were taken so far.
4076
4107
InterleaveInfo.invalidateGroupsRequiringScalarEpilogue ();
4108
+ collectScalarEpilogueRequirements (/* Invalidate=*/ true );
4077
4109
}
4078
4110
4079
4111
FixedScalableVFPair MaxFactors = computeFeasibleMaxVF (MaxTC, UserVF, true );
@@ -4145,7 +4177,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
4145
4177
if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) {
4146
4178
LLVM_DEBUG (dbgs () << " LV: Cannot fold tail by masking: vectorize with a "
4147
4179
" scalar epilogue instead.\n " );
4148
- ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
4180
+ setScalarEpilogueStatus ( CM_ScalarEpilogueAllowed) ;
4149
4181
return MaxFactors;
4150
4182
}
4151
4183
@@ -7058,6 +7090,7 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
7058
7090
if (!OrigLoop->isInnermost ()) {
7059
7091
// If the user doesn't provide a vectorization factor, determine a
7060
7092
// reasonable one.
7093
+ CM.collectScalarEpilogueRequirements (/* Invalidate=*/ false );
7061
7094
if (UserVF.isZero ()) {
7062
7095
VF = determineVPlanVF (TTI, CM);
7063
7096
LLVM_DEBUG (dbgs () << " LV: VPlan computed VF " << VF << " .\n " );
@@ -7102,6 +7135,7 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
7102
7135
7103
7136
void LoopVectorizationPlanner::plan (ElementCount UserVF, unsigned UserIC) {
7104
7137
assert (OrigLoop->isInnermost () && " Inner loop expected." );
7138
+ CM.collectScalarEpilogueRequirements (/* Invalidate=*/ false );
7105
7139
CM.collectValuesToIgnore ();
7106
7140
CM.collectElementTypesForWidening ();
7107
7141
@@ -7116,11 +7150,13 @@ void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
7116
7150
dbgs ()
7117
7151
<< " LV: Invalidate all interleaved groups due to fold-tail by masking "
7118
7152
" which requires masked-interleaved support.\n " );
7119
- if (CM.InterleaveInfo .invalidateGroups ())
7153
+ if (CM.InterleaveInfo .invalidateGroups ()) {
7120
7154
// Invalidating interleave groups also requires invalidating all decisions
7121
7155
// based on them, which includes widening decisions and uniform and scalar
7122
7156
// values.
7123
7157
CM.invalidateCostModelingDecisions ();
7158
+ CM.collectScalarEpilogueRequirements (/* Invalidate=*/ true );
7159
+ }
7124
7160
}
7125
7161
7126
7162
if (CM.foldTailByMasking ())
0 commit comments