9
9
#include " flang/Optimizer/Dialect/FIROps.h"
10
10
#include " flang/Optimizer/OpenMP/Passes.h"
11
11
#include " flang/Optimizer/OpenMP/Utils.h"
12
+ #include " mlir/Analysis/SliceAnalysis.h"
12
13
#include " mlir/Dialect/OpenMP/OpenMPDialect.h"
13
14
#include " mlir/Transforms/DialectConversion.h"
15
+ #include " mlir/Transforms/RegionUtils.h"
14
16
15
17
namespace flangomp {
16
18
#define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS
@@ -21,6 +23,106 @@ namespace flangomp {
21
23
#define DBGS () (llvm::dbgs() << " [" DEBUG_TYPE << " ]: " )
22
24
23
25
namespace {
26
+ namespace looputils {
27
+ using LoopNest = llvm::SetVector<fir::DoLoopOp>;
28
+
29
+ // / Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff
30
+ // / there are no operations in \p outerloop's body other than:
31
+ // /
32
+ // / 1. the operations needed to assing/update \p outerLoop's induction variable.
33
+ // / 2. \p innerLoop itself.
34
+ // /
35
+ // / \p return true if \p innerLoop is perfectly nested inside \p outerLoop
36
+ // / according to the above definition.
37
+ bool isPerfectlyNested (fir::DoLoopOp outerLoop, fir::DoLoopOp innerLoop) {
38
+ mlir::ForwardSliceOptions forwardSliceOptions;
39
+ forwardSliceOptions.inclusive = true ;
40
+ // We don't care about the outer-loop's induction variable's uses within the
41
+ // inner-loop, so we filter out these uses.
42
+ //
43
+ // This filter tells `getForwardSlice` (below) to only collect operations
44
+ // which produce results defined above (i.e. outside) the inner-loop's body.
45
+ //
46
+ // Since `outerLoop.getInductionVar()` is a block argument (to the
47
+ // outer-loop's body), the filter effectively collects uses of
48
+ // `outerLoop.getInductionVar()` inside the outer-loop but outside the
49
+ // inner-loop.
50
+ forwardSliceOptions.filter = [&](mlir::Operation *op) {
51
+ return mlir::areValuesDefinedAbove (op->getResults (), innerLoop.getRegion ());
52
+ };
53
+
54
+ llvm::SetVector<mlir::Operation *> indVarSlice;
55
+ mlir::getForwardSlice (outerLoop.getInductionVar (), &indVarSlice,
56
+ forwardSliceOptions);
57
+ llvm::DenseSet<mlir::Operation *> indVarSet (indVarSlice.begin (),
58
+ indVarSlice.end ());
59
+
60
+ llvm::DenseSet<mlir::Operation *> outerLoopBodySet;
61
+ // The following walk collects ops inside `outerLoop` that are **not**:
62
+ // * the outer-loop itself,
63
+ // * or the inner-loop,
64
+ // * or the `fir.result` op (the outer-loop's terminator).
65
+ outerLoop.walk <mlir::WalkOrder::PreOrder>([&](mlir::Operation *op) {
66
+ if (op == outerLoop)
67
+ return mlir::WalkResult::advance ();
68
+
69
+ if (op == innerLoop)
70
+ return mlir::WalkResult::skip ();
71
+
72
+ if (mlir::isa<fir::ResultOp>(op))
73
+ return mlir::WalkResult::advance ();
74
+
75
+ outerLoopBodySet.insert (op);
76
+ return mlir::WalkResult::advance ();
77
+ });
78
+
79
+ // If `outerLoopBodySet` ends up having the same ops as `indVarSet`, then
80
+ // `outerLoop` only contains ops that setup its induction variable +
81
+ // `innerLoop` + the `fir.result` terminator. In other words, `innerLoop` is
82
+ // perfectly nested inside `outerLoop`.
83
+ bool result = (outerLoopBodySet == indVarSet);
84
+ mlir::Location loc = outerLoop.getLoc ();
85
+ LLVM_DEBUG (DBGS () << " Loop pair starting at location " << loc << " is"
86
+ << (result ? " " : " not" ) << " perfectly nested\n " );
87
+
88
+ return result;
89
+ }
90
+
91
+ // / Starting with `outerLoop` collect a perfectly nested loop nest, if any. This
92
+ // / function collects as much as possible loops in the nest; it case it fails to
93
+ // / recognize a certain nested loop as part of the nest it just returns the
94
+ // / parent loops it discovered before.
95
+ mlir::LogicalResult collectLoopNest (fir::DoLoopOp currentLoop,
96
+ LoopNest &loopNest) {
97
+ assert (currentLoop.getUnordered ());
98
+
99
+ while (true ) {
100
+ loopNest.insert (currentLoop);
101
+ auto directlyNestedLoops = currentLoop.getRegion ().getOps <fir::DoLoopOp>();
102
+ llvm::SmallVector<fir::DoLoopOp> unorderedLoops;
103
+
104
+ for (auto nestedLoop : directlyNestedLoops)
105
+ if (nestedLoop.getUnordered ())
106
+ unorderedLoops.push_back (nestedLoop);
107
+
108
+ if (unorderedLoops.empty ())
109
+ break ;
110
+
111
+ if (unorderedLoops.size () > 1 )
112
+ return mlir::failure ();
113
+
114
+ fir::DoLoopOp nestedUnorderedLoop = unorderedLoops.front ();
115
+
116
+ if (!isPerfectlyNested (currentLoop, nestedUnorderedLoop))
117
+ return mlir::failure ();
118
+
119
+ currentLoop = nestedUnorderedLoop;
120
+ }
121
+
122
+ return mlir::success ();
123
+ }
124
+ } // namespace looputils
125
+
24
126
class DoConcurrentConversion : public mlir ::OpConversionPattern<fir::DoLoopOp> {
25
127
public:
26
128
using mlir::OpConversionPattern<fir::DoLoopOp>::OpConversionPattern;
@@ -31,6 +133,14 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
31
133
mlir::LogicalResult
32
134
matchAndRewrite (fir::DoLoopOp doLoop, OpAdaptor adaptor,
33
135
mlir::ConversionPatternRewriter &rewriter) const override {
136
+ looputils::LoopNest loopNest;
137
+ bool hasRemainingNestedLoops =
138
+ failed (looputils::collectLoopNest (doLoop, loopNest));
139
+ if (hasRemainingNestedLoops)
140
+ mlir::emitWarning (doLoop.getLoc (),
141
+ " Some `do concurent` loops are not perfectly-nested. "
142
+ " These will be serialzied." );
143
+
34
144
// TODO This will be filled in with the next PRs that upstreams the rest of
35
145
// the ROCm implementaion.
36
146
return mlir::success ();
0 commit comments