@@ -42,13 +42,32 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
42
42
mlir::omp::ParallelOp parallelOp =
43
43
rewriter.create <mlir::omp::ParallelOp>(doLoop.getLoc ());
44
44
45
- rewriter.createBlock (¶llelOp.getRegion ());
46
- mlir::Block &block = parallelOp.getRegion ().back ();
45
+ mlir::Block *block = rewriter.createBlock (¶llelOp.getRegion ());
47
46
48
- rewriter.setInsertionPointToEnd (& block);
47
+ rewriter.setInsertionPointToEnd (block);
49
48
rewriter.create <mlir::omp::TerminatorOp>(doLoop.getLoc ());
50
49
51
- rewriter.setInsertionPointToStart (&block);
50
+ rewriter.setInsertionPointToStart (block);
51
+
52
+ // ==== TODO (1) Start ====
53
+ //
54
+ // The goal of the few lines below is to collect and clone
55
+ // the list of operations that define the loop's lower and upper bounds as
56
+ // well as the step. Should we, instead of doing this here, split it into 2
57
+ // stages?
58
+ //
59
+ // 1. **Stage 1**: add an analysis that extracts all the relevant
60
+ // operations defining the lower-bound, upper-bound, and
61
+ // step.
62
+ // 2. **Stage 2**: clone the collected operations in the parallel region.
63
+ //
64
+ // So far, the pass has been tested with very simple loops (where the bounds
65
+ // and step are constants) so the goal of **Stage 1** is to have a
66
+ // well-defined component that has the sole responsibility of collecting all
67
+ // the relevant ops relevant to the loop header. This was we can test this
68
+ // in isolation for more complex loops and better organize the code. **Stage
69
+ // 2** would then be responsible for the actual cloning of the collected
70
+ // loop header preparation/allocation operations.
52
71
53
72
// Clone the LB, UB, step defining ops inside the parallel region.
54
73
llvm::SmallVector<mlir::Value> lowerBound, upperBound, step;
@@ -58,16 +77,34 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
58
77
rewriter.clone (*doLoop.getUpperBound ().getDefiningOp ())->getResult (0 ));
59
78
step.push_back (
60
79
rewriter.clone (*doLoop.getStep ().getDefiningOp ())->getResult (0 ));
80
+ // ==== TODO (1) End ====
61
81
62
82
auto wsLoopOp = rewriter.create <mlir::omp::WsLoopOp>(
63
83
doLoop.getLoc (), lowerBound, upperBound, step);
64
84
wsLoopOp.setInclusive (true );
65
85
66
86
auto outlineableOp =
67
87
mlir::dyn_cast<mlir::omp::OutlineableOpenMPOpInterface>(*parallelOp);
68
- assert (outlineableOp);
69
88
rewriter.setInsertionPointToStart (outlineableOp.getAllocaBlock ());
70
89
90
+ // ==== TODO (2) Start ====
91
+ //
92
+ // The goal of the following simple work-list algorithm and
93
+ // the following `for` loop is to collect all the operations related to the
94
+ // allocation of the induction variable for the `do concurrent` loop. The
95
+ // operations collected by this algorithm are very similar to what is
96
+ // usually emitted for privatized variables, e.g. for omp.parallel loops.
97
+ // Therefore, I think we can:
98
+ //
99
+ // 1. **Stage 1**: Add an analysis that colects all these operations. The
100
+ // goal is similar to **Stage 1** of TODO (1): isolate the
101
+ // algorithm is an individually-testable component so that
102
+ // we properly implement and test it for more complicated
103
+ // `do concurrent` loops.
104
+ // 1. **Stage 2**: Using the collected operations, create and populate an
105
+ // `omp.private {type=private}` op to server as the
106
+ // delayed privatizer for the new work-sharing loop.
107
+
71
108
// For the induction variable, we need to privative its allocation and
72
109
// binding inside the parallel region.
73
110
llvm::SmallSetVector<mlir::Operation *, 2 > workList;
@@ -101,6 +138,12 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
101
138
}
102
139
declareAndAllocasToClone.insert (storeTarget);
103
140
}
141
+ // ==== TODO (2) End ====
142
+ //
143
+ // TODO (1 & 2): Isolating analyses proposed in both TODOs, I think we can
144
+ // more easily generalize the pass to work for targets other than OpenMP,
145
+ // e.g. OpenACC, I think can, can reuse the results of the analyses and only
146
+ // change the code-gen/rewriting.
104
147
105
148
mlir::IRMapping mapper;
106
149
0 commit comments