1717#include " llvm/ADT/SmallSet.h"
1818#include " llvm/ADT/SmallVector.h"
1919#include " llvm/ADT/Statistic.h"
20+ #include " llvm/ADT/StringMap.h"
2021#include " llvm/ADT/StringRef.h"
21- #include " llvm/ADT/StringSet.h"
2222#include " llvm/Analysis/DependenceAnalysis.h"
2323#include " llvm/Analysis/LoopCacheAnalysis.h"
2424#include " llvm/Analysis/LoopInfo.h"
@@ -80,6 +80,21 @@ enum class RuleTy {
8080 ForVectorization,
8181};
8282
83+ // / Store the information about if corresponding direction vector was negated
84+ // / by normalization or not. This is necessary to restore the original one from
85+ // / a row of a dependency matrix because we only manage normalized direction
86+ // / vectors. Also, duplicate vectors are eliminated, so there may be both
87+ // / original and negated vectors for a single entry (a row of dependency
88+ // / matrix). E.g., if there are two direction vectors `[< =]` and `[> =]`, the
89+ // / later one will be converted to the same as former one by normalization, so
90+ // / only `[< =]` would be retained in the final result.
91+ struct NegatedStatus {
92+ bool Original = false ;
93+ bool Negated = false ;
94+
95+ bool isNonNegativeDir (char Dir) const ;
96+ };
97+
8398} // end anonymous namespace
8499
85100// Minimum loop depth supported.
@@ -126,9 +141,10 @@ static void printDepMatrix(CharMatrix &DepMatrix) {
126141}
127142#endif
128143
129- static bool populateDependencyMatrix (CharMatrix &DepMatrix, unsigned Level,
130- Loop *L, DependenceInfo *DI,
131- ScalarEvolution *SE,
144+ static bool populateDependencyMatrix (CharMatrix &DepMatrix,
145+ std::vector<NegatedStatus> &NegStatusVec,
146+ unsigned Level, Loop *L,
147+ DependenceInfo *DI, ScalarEvolution *SE,
132148 OptimizationRemarkEmitter *ORE) {
133149 using ValueVector = SmallVector<Value *, 16 >;
134150
@@ -167,7 +183,9 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
167183 return false ;
168184 }
169185 ValueVector::iterator I, IE, J, JE;
170- StringSet<> Seen;
186+
187+ // Manage all found direction vectors. and map it to the index of DepMatrix.
188+ StringMap<unsigned > Seen;
171189
172190 for (I = MemInstr.begin (), IE = MemInstr.end (); I != IE; ++I) {
173191 for (J = I, JE = MemInstr.end (); J != JE; ++J) {
@@ -182,7 +200,8 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
182200 assert (D->isOrdered () && " Expected an output, flow or anti dep." );
183201 // If the direction vector is negative, normalize it to
184202 // make it non-negative.
185- if (D->normalize (SE))
203+ bool Normalized = D->normalize (SE);
204+ if (Normalized)
186205 LLVM_DEBUG (dbgs () << " Negative dependence vector normalized.\n " );
187206 LLVM_DEBUG (StringRef DepType =
188207 D->isFlow () ? " flow" : D->isAnti () ? " anti" : " output" ;
@@ -214,8 +233,17 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
214233 }
215234
216235 // Make sure we only add unique entries to the dependency matrix.
217- if (Seen.insert (StringRef (Dep.data (), Dep.size ())).second )
236+ unsigned Index = DepMatrix.size ();
237+ auto [Ite, Inserted] =
238+ Seen.try_emplace (StringRef (Dep.data (), Dep.size ()), Index);
239+ if (Inserted) {
218240 DepMatrix.push_back (Dep);
241+ NegStatusVec.push_back (NegatedStatus{});
242+ } else
243+ Index = Ite->second ;
244+
245+ NegatedStatus &Status = NegStatusVec[Index];
246+ (Normalized ? Status.Negated : Status.Original ) = true ;
219247 }
220248 }
221249 }
@@ -400,6 +428,7 @@ class LoopInterchangeProfitability {
400428 bool isProfitable (const Loop *InnerLoop, const Loop *OuterLoop,
401429 unsigned InnerLoopId, unsigned OuterLoopId,
402430 CharMatrix &DepMatrix,
431+ const std::vector<NegatedStatus> &NegStatusVec,
403432 const DenseMap<const Loop *, unsigned > &CostMap,
404433 std::unique_ptr<CacheCost> &CC);
405434
@@ -409,9 +438,10 @@ class LoopInterchangeProfitability {
409438 const DenseMap<const Loop *, unsigned > &CostMap,
410439 std::unique_ptr<CacheCost> &CC);
411440 std::optional<bool > isProfitablePerInstrOrderCost ();
412- std::optional<bool > isProfitableForVectorization (unsigned InnerLoopId,
413- unsigned OuterLoopId,
414- CharMatrix &DepMatrix);
441+ std::optional<bool >
442+ isProfitableForVectorization (unsigned InnerLoopId, unsigned OuterLoopId,
443+ CharMatrix &DepMatrix,
444+ const std::vector<NegatedStatus> &NegStatusVec);
415445 Loop *OuterLoop;
416446 Loop *InnerLoop;
417447
@@ -503,8 +533,9 @@ struct LoopInterchange {
503533 << " \n " );
504534
505535 CharMatrix DependencyMatrix;
536+ std::vector<NegatedStatus> NegStatusVec;
506537 Loop *OuterMostLoop = *(LoopList.begin ());
507- if (!populateDependencyMatrix (DependencyMatrix, LoopNestDepth,
538+ if (!populateDependencyMatrix (DependencyMatrix, NegStatusVec, LoopNestDepth,
508539 OuterMostLoop, DI, SE, ORE)) {
509540 LLVM_DEBUG (dbgs () << " Populating dependency matrix failed\n " );
510541 return false ;
@@ -543,8 +574,8 @@ struct LoopInterchange {
543574 for (unsigned j = SelecLoopId; j > 0 ; j--) {
544575 bool ChangedPerIter = false ;
545576 for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
546- bool Interchanged =
547- processLoop (LoopList, i, i - 1 , DependencyMatrix , CostMap);
577+ bool Interchanged = processLoop (LoopList, i, i - 1 , DependencyMatrix,
578+ NegStatusVec , CostMap);
548579 ChangedPerIter |= Interchanged;
549580 Changed |= Interchanged;
550581 }
@@ -559,6 +590,8 @@ struct LoopInterchange {
559590 bool processLoop (SmallVectorImpl<Loop *> &LoopList, unsigned InnerLoopId,
560591 unsigned OuterLoopId,
561592 std::vector<std::vector<char >> &DependencyMatrix,
593+
594+ const std::vector<NegatedStatus> &NegStatusVec,
562595 const DenseMap<const Loop *, unsigned > &CostMap) {
563596 Loop *OuterLoop = LoopList[OuterLoopId];
564597 Loop *InnerLoop = LoopList[InnerLoopId];
@@ -572,7 +605,7 @@ struct LoopInterchange {
572605 LLVM_DEBUG (dbgs () << " Loops are legal to interchange\n " );
573606 LoopInterchangeProfitability LIP (OuterLoop, InnerLoop, SE, ORE);
574607 if (!LIP.isProfitable (InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
575- DependencyMatrix, CostMap, CC)) {
608+ DependencyMatrix, NegStatusVec, CostMap, CC)) {
576609 LLVM_DEBUG (dbgs () << " Interchanging loops not profitable.\n " );
577610 return false ;
578611 }
@@ -1197,27 +1230,71 @@ LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
11971230 return std::nullopt ;
11981231}
11991232
1233+ static char flipDirection (char Dir) {
1234+ switch (Dir) {
1235+ case ' <' :
1236+ return ' >' ;
1237+ case ' >' :
1238+ return ' <' ;
1239+ case ' =' :
1240+ case ' I' :
1241+ case ' *' :
1242+ return Dir;
1243+ default :
1244+ llvm_unreachable (" Unknown direction" );
1245+ }
1246+ }
1247+
1248+ // / Ensure that there are no negative direction dependencies corresponding to \p
1249+ // / Dir.
1250+ bool NegatedStatus::isNonNegativeDir (char Dir) const {
1251+ assert ((Original || Negated) && " Cannot restore the original direction" );
1252+
1253+ // If both flag is true, it means that there is both as-is and negated
1254+ // direction. In this case only `=` or `I` don't have negative direction
1255+ // dependency.
1256+ if (Original && Negated)
1257+ return Dir == ' =' || Dir == ' I' ;
1258+
1259+ char Restored = Negated ? flipDirection (Dir) : Dir;
1260+ return Restored == ' =' || Restored == ' I' || Restored == ' <' ;
1261+ }
1262+
12001263// / Return true if we can vectorize the loop specified by \p LoopId.
1201- static bool canVectorize (const CharMatrix &DepMatrix, unsigned LoopId) {
1264+ static bool canVectorize (const CharMatrix &DepMatrix,
1265+ const std::vector<NegatedStatus> &NegStatusVec,
1266+ unsigned LoopId) {
1267+ // The loop can be vectorized if there are no negative dependencies. Consider
1268+ // the dependency of `j` in the following example.
1269+ //
1270+ // Positive: ... = A[i][j] Negative: ... = A[i][j-1]
1271+ // A[i][j-1] = ... A[i][j] = ...
1272+ //
1273+ // In the right case, vectorizing the loop can change the loaded value from
1274+ // `A[i][j-1]`. At the moment we don't take into account the distance of the
1275+ // dependency and vector width.
1276+ // TODO: Considering the dependency distance and the vector width can give a
1277+ // more accurate result. For example, the following loop can be vectorized if
1278+ // the vector width is less than or equal to 4 x sizeof(A[0][0]).
12021279 for (unsigned I = 0 ; I != DepMatrix.size (); I++) {
12031280 char Dir = DepMatrix[I][LoopId];
1204- if (Dir != ' I ' && Dir != ' = ' )
1281+ if (!NegStatusVec[I]. isNonNegativeDir ( Dir) )
12051282 return false ;
12061283 }
12071284 return true ;
12081285}
12091286
12101287std::optional<bool > LoopInterchangeProfitability::isProfitableForVectorization (
1211- unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
1212- // If the outer loop is not loop independent it is not profitable to move
1213- // this to inner position, since doing so would not enable inner loop
1214- // parallelism .
1215- if (!canVectorize (DepMatrix, OuterLoopId))
1288+ unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix,
1289+ const std::vector<NegatedStatus> &NegStatusVec) {
1290+ // If the outer loop cannot be vectorized, it is not profitable to move this
1291+ // to inner position .
1292+ if (!canVectorize (DepMatrix, NegStatusVec, OuterLoopId))
12161293 return false ;
12171294
1218- // If inner loop has dependence and outer loop is loop independent then it is
1295+ // If inner loop cannot be vectorized and outer loop can be then it is
12191296 // profitable to interchange to enable inner loop parallelism.
1220- if (!canVectorize (DepMatrix, InnerLoopId))
1297+ if (!canVectorize (DepMatrix, NegStatusVec, InnerLoopId))
12211298 return true ;
12221299
12231300 // TODO: Estimate the cost of vectorized loop body when both the outer and the
@@ -1228,6 +1305,7 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
12281305bool LoopInterchangeProfitability::isProfitable (
12291306 const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
12301307 unsigned OuterLoopId, CharMatrix &DepMatrix,
1308+ const std::vector<NegatedStatus> &NegStatusVec,
12311309 const DenseMap<const Loop *, unsigned > &CostMap,
12321310 std::unique_ptr<CacheCost> &CC) {
12331311 // isProfitable() is structured to avoid endless loop interchange. If the
@@ -1249,8 +1327,8 @@ bool LoopInterchangeProfitability::isProfitable(
12491327 shouldInterchange = isProfitablePerInstrOrderCost ();
12501328 break ;
12511329 case RuleTy::ForVectorization:
1252- shouldInterchange =
1253- isProfitableForVectorization (InnerLoopId, OuterLoopId, DepMatrix);
1330+ shouldInterchange = isProfitableForVectorization (InnerLoopId, OuterLoopId,
1331+ DepMatrix, NegStatusVec );
12541332 break ;
12551333 }
12561334
0 commit comments