-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][OpenMP] Extend do concurrent
mapping to multi-range loops
#127634
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
ergawy
merged 3 commits into
main
from
users/ergawy/upstream_do_concurrent_4_multi_range_loops
Apr 2, 2025
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -28,64 +28,80 @@ namespace looputils { | |||||
/// Stores info needed about the induction/iteration variable for each `do | ||||||
/// concurrent` in a loop nest. | ||||||
struct InductionVariableInfo { | ||||||
InductionVariableInfo(fir::DoLoopOp doLoop) { populateInfo(doLoop); } | ||||||
|
||||||
/// The operation allocating memory for iteration variable. | ||||||
mlir::Operation *iterVarMemDef; | ||||||
}; | ||||||
/// the operation(s) updating the iteration variable with the current | ||||||
/// iteration number. | ||||||
llvm::SmallVector<mlir::Operation *, 2> indVarUpdateOps; | ||||||
|
||||||
using LoopNestToIndVarMap = | ||||||
llvm::MapVector<fir::DoLoopOp, InductionVariableInfo>; | ||||||
|
||||||
/// For the \p doLoop parameter, find the operation that declares its iteration | ||||||
/// variable or allocates memory for it. | ||||||
/// | ||||||
/// For example, give the following loop: | ||||||
/// ``` | ||||||
/// ... | ||||||
/// %i:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : ... | ||||||
/// ... | ||||||
/// fir.do_loop %ind_var = %lb to %ub step %s unordered { | ||||||
/// %ind_var_conv = fir.convert %ind_var : (index) -> i32 | ||||||
/// fir.store %ind_var_conv to %i#1 : !fir.ref<i32> | ||||||
/// ... | ||||||
/// } | ||||||
/// ``` | ||||||
/// | ||||||
/// This function returns the `hlfir.declare` op for `%i`. | ||||||
/// | ||||||
/// Note: The current implementation is dependent on how flang emits loop | ||||||
/// bodies; which is sufficient for the current simple test/use cases. If this | ||||||
/// proves to be insufficient, this should be made more generic. | ||||||
mlir::Operation *findLoopIterationVarMemDecl(fir::DoLoopOp doLoop) { | ||||||
mlir::Value result = nullptr; | ||||||
|
||||||
// Checks if a StoreOp is updating the memref of the loop's iteration | ||||||
// variable. | ||||||
auto isStoringIV = [&](fir::StoreOp storeOp) { | ||||||
// Direct store into the IV memref. | ||||||
if (storeOp.getValue() == doLoop.getInductionVar()) | ||||||
return true; | ||||||
|
||||||
// Indirect store into the IV memref. | ||||||
if (auto convertOp = mlir::dyn_cast<fir::ConvertOp>( | ||||||
storeOp.getValue().getDefiningOp())) { | ||||||
if (convertOp.getOperand() == doLoop.getInductionVar()) | ||||||
private: | ||||||
/// For the \p doLoop parameter, find the following: | ||||||
/// | ||||||
/// 1. The operation that declares its iteration variable or allocates memory | ||||||
/// for it. For example, give the following loop: | ||||||
/// ``` | ||||||
/// ... | ||||||
/// %i:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : ... | ||||||
/// ... | ||||||
/// fir.do_loop %ind_var = %lb to %ub step %s unordered { | ||||||
/// %ind_var_conv = fir.convert %ind_var : (index) -> i32 | ||||||
/// fir.store %ind_var_conv to %i#1 : !fir.ref<i32> | ||||||
/// ... | ||||||
/// } | ||||||
/// ``` | ||||||
/// | ||||||
/// This function sets the `iterVarMemDef` member to the `hlfir.declare` op | ||||||
/// for `%i`. | ||||||
/// | ||||||
/// 2. The operation(s) that update the loop's iteration variable from its | ||||||
/// induction variable. For the above example, the `indVarUpdateOps` is | ||||||
/// populated with the first 2 ops in the loop's body. | ||||||
/// | ||||||
/// Note: The current implementation is dependent on how flang emits loop | ||||||
/// bodies; which is sufficient for the current simple test/use cases. If this | ||||||
/// proves to be insufficient, this should be made more generic. | ||||||
void populateInfo(fir::DoLoopOp doLoop) { | ||||||
mlir::Value result = nullptr; | ||||||
|
||||||
// Checks if a StoreOp is updating the memref of the loop's iteration | ||||||
// variable. | ||||||
auto isStoringIV = [&](fir::StoreOp storeOp) { | ||||||
// Direct store into the IV memref. | ||||||
if (storeOp.getValue() == doLoop.getInductionVar()) { | ||||||
indVarUpdateOps.push_back(storeOp); | ||||||
return true; | ||||||
} | ||||||
|
||||||
return false; | ||||||
}; | ||||||
} | ||||||
|
||||||
for (mlir::Operation &op : doLoop) { | ||||||
if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(op)) | ||||||
if (isStoringIV(storeOp)) { | ||||||
result = storeOp.getMemref(); | ||||||
break; | ||||||
// Indirect store into the IV memref. | ||||||
if (auto convertOp = mlir::dyn_cast<fir::ConvertOp>( | ||||||
storeOp.getValue().getDefiningOp())) { | ||||||
if (convertOp.getOperand() == doLoop.getInductionVar()) { | ||||||
indVarUpdateOps.push_back(convertOp); | ||||||
indVarUpdateOps.push_back(storeOp); | ||||||
return true; | ||||||
} | ||||||
} | ||||||
|
||||||
return false; | ||||||
}; | ||||||
|
||||||
for (mlir::Operation &op : doLoop) { | ||||||
if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(op)) | ||||||
if (isStoringIV(storeOp)) { | ||||||
result = storeOp.getMemref(); | ||||||
break; | ||||||
} | ||||||
} | ||||||
|
||||||
assert(result != nullptr && result.getDefiningOp() != nullptr); | ||||||
iterVarMemDef = result.getDefiningOp(); | ||||||
} | ||||||
}; | ||||||
|
||||||
assert(result != nullptr && result.getDefiningOp() != nullptr); | ||||||
return result.getDefiningOp(); | ||||||
} | ||||||
using LoopNestToIndVarMap = | ||||||
llvm::MapVector<fir::DoLoopOp, InductionVariableInfo>; | ||||||
|
||||||
/// Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff | ||||||
/// there are no operations in \p outerloop's body other than: | ||||||
|
@@ -181,10 +197,7 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop, | |||||
assert(currentLoop.getUnordered()); | ||||||
|
||||||
while (true) { | ||||||
loopNest.insert( | ||||||
{currentLoop, | ||||||
InductionVariableInfo{findLoopIterationVarMemDecl(currentLoop)}}); | ||||||
|
||||||
loopNest.insert({currentLoop, InductionVariableInfo(currentLoop)}); | ||||||
llvm::SmallVector<fir::DoLoopOp> unorderedLoops; | ||||||
|
||||||
for (auto nestedLoop : currentLoop.getRegion().getOps<fir::DoLoopOp>()) | ||||||
|
@@ -210,6 +223,96 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop, | |||||
|
||||||
return mlir::success(); | ||||||
} | ||||||
|
||||||
/// Prepares the `fir.do_loop` nest to be easily mapped to OpenMP. In | ||||||
/// particular, this function would take this input IR: | ||||||
/// ``` | ||||||
/// fir.do_loop %i_iv = %i_lb to %i_ub step %i_step unordered { | ||||||
/// fir.store %i_iv to %i#1 : !fir.ref<i32> | ||||||
/// %j_lb = arith.constant 1 : i32 | ||||||
/// %j_ub = arith.constant 10 : i32 | ||||||
/// %j_step = arith.constant 1 : index | ||||||
/// | ||||||
/// fir.do_loop %j_iv = %j_lb to %j_ub step %j_step unordered { | ||||||
/// fir.store %j_iv to %j#1 : !fir.ref<i32> | ||||||
/// ... | ||||||
/// } | ||||||
/// } | ||||||
/// ``` | ||||||
/// | ||||||
/// into the following form (using generic op form since the result is | ||||||
/// technically an invalid `fir.do_loop` op: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
/// | ||||||
/// ``` | ||||||
/// "fir.do_loop"(%i_lb, %i_ub, %i_step) <{unordered}> ({ | ||||||
/// ^bb0(%i_iv: index): | ||||||
/// %j_lb = "arith.constant"() <{value = 1 : i32}> : () -> i32 | ||||||
/// %j_ub = "arith.constant"() <{value = 10 : i32}> : () -> i32 | ||||||
/// %j_step = "arith.constant"() <{value = 1 : index}> : () -> index | ||||||
/// | ||||||
/// "fir.do_loop"(%j_lb, %j_ub, %j_step) <{unordered}> ({ | ||||||
/// ^bb0(%new_i_iv: index, %new_j_iv: index): | ||||||
/// "fir.store"(%new_i_iv, %i#1) : (i32, !fir.ref<i32>) -> () | ||||||
/// "fir.store"(%new_j_iv, %j#1) : (i32, !fir.ref<i32>) -> () | ||||||
/// ... | ||||||
/// }) | ||||||
/// ``` | ||||||
/// | ||||||
/// What happened to the loop nest is the following: | ||||||
/// | ||||||
/// * the innermost loop's entry block was updated from having one operand to | ||||||
/// having `n` operands where `n` is the number of loops in the nest, | ||||||
/// | ||||||
/// * the outer loop(s)' ops that update the IVs were sank inside the innermost | ||||||
/// loop (see the `"fir.store"(%new_i_iv, %i#1)` op above), | ||||||
/// | ||||||
/// * the innermost loop's entry block's arguments were mapped in order from the | ||||||
/// outermost to the innermost IV. | ||||||
/// | ||||||
/// With this IR change, we can directly inline the innermost loop's region into | ||||||
/// the newly generated `omp.loop_nest` op. | ||||||
/// | ||||||
/// Note that this function has a pre-condition that \p loopNest consists of | ||||||
/// perfectly nested loops; i.e. there are no in-between ops between 2 nested | ||||||
/// loops except for the ops to setup the inner loop's LB, UB, and step. These | ||||||
/// ops are handled/cloned by `genLoopNestClauseOps(..)`. | ||||||
void sinkLoopIVArgs(mlir::ConversionPatternRewriter &rewriter, | ||||||
looputils::LoopNestToIndVarMap &loopNest) { | ||||||
if (loopNest.size() <= 1) | ||||||
return; | ||||||
|
||||||
fir::DoLoopOp innermostLoop = loopNest.back().first; | ||||||
mlir::Operation &innermostFirstOp = innermostLoop.getRegion().front().front(); | ||||||
|
||||||
llvm::SmallVector<mlir::Type> argTypes; | ||||||
llvm::SmallVector<mlir::Location> argLocs; | ||||||
|
||||||
for (auto &[doLoop, indVarInfo] : llvm::drop_end(loopNest)) { | ||||||
// Sink the IV update ops to the innermost loop. We need to do for all loops | ||||||
// except for the innermost one, hence the `drop_end` usage above. | ||||||
for (mlir::Operation *op : indVarInfo.indVarUpdateOps) | ||||||
op->moveBefore(&innermostFirstOp); | ||||||
|
||||||
argTypes.push_back(doLoop.getInductionVar().getType()); | ||||||
argLocs.push_back(doLoop.getInductionVar().getLoc()); | ||||||
} | ||||||
|
||||||
mlir::Region &innermmostRegion = innermostLoop.getRegion(); | ||||||
// Extend the innermost entry block with arguments to represent the outer IVs. | ||||||
innermmostRegion.addArguments(argTypes, argLocs); | ||||||
|
||||||
unsigned idx = 1; | ||||||
// In reverse, remap the IVs of the loop nest from the old values to the new | ||||||
// ones. We do that in reverse since the first argument before this loop is | ||||||
// the old IV for the innermost loop. Therefore, we want to replace it first | ||||||
// before the old value (1st argument in the block) is remapped to be the IV | ||||||
// of the outermost loop in the nest. | ||||||
for (auto &[doLoop, _] : llvm::reverse(loopNest)) { | ||||||
doLoop.getInductionVar().replaceAllUsesWith( | ||||||
innermmostRegion.getArgument(innermmostRegion.getNumArguments() - idx)); | ||||||
++idx; | ||||||
} | ||||||
} | ||||||
} // namespace looputils | ||||||
|
||||||
class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> { | ||||||
|
@@ -236,6 +339,7 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> { | |||||
"Some `do concurent` loops are not perfectly-nested. " | ||||||
"These will be serialized."); | ||||||
|
||||||
looputils::sinkLoopIVArgs(rewriter, loopNest); | ||||||
mlir::IRMapping mapper; | ||||||
genParallelOp(doLoop.getLoc(), rewriter, loopNest, mapper); | ||||||
mlir::omp::LoopNestOperands loopNestClauseOps; | ||||||
|
72 changes: 72 additions & 0 deletions
72
flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
! Tests mapping of a `do concurrent` loop with multiple iteration ranges. | ||
|
||
! RUN: split-file %s %t | ||
|
||
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %t/multi_range.f90 -o - \ | ||
! RUN: | FileCheck %s | ||
|
||
!--- multi_range.f90 | ||
program main | ||
integer, parameter :: n = 20 | ||
integer, parameter :: m = 40 | ||
integer, parameter :: l = 60 | ||
integer :: a(n, m, l) | ||
|
||
do concurrent(i=3:n, j=5:m, k=7:l) | ||
a(i,j,k) = i * j + k | ||
end do | ||
end | ||
|
||
! CHECK: func.func @_QQmain | ||
|
||
! CHECK: %[[C3:.*]] = arith.constant 3 : i32 | ||
! CHECK: %[[LB_I:.*]] = fir.convert %[[C3]] : (i32) -> index | ||
! CHECK: %[[C20:.*]] = arith.constant 20 : i32 | ||
! CHECK: %[[UB_I:.*]] = fir.convert %[[C20]] : (i32) -> index | ||
! CHECK: %[[STEP_I:.*]] = arith.constant 1 : index | ||
|
||
! CHECK: %[[C5:.*]] = arith.constant 5 : i32 | ||
! CHECK: %[[LB_J:.*]] = fir.convert %[[C5]] : (i32) -> index | ||
! CHECK: %[[C40:.*]] = arith.constant 40 : i32 | ||
! CHECK: %[[UB_J:.*]] = fir.convert %[[C40]] : (i32) -> index | ||
! CHECK: %[[STEP_J:.*]] = arith.constant 1 : index | ||
|
||
! CHECK: %[[C7:.*]] = arith.constant 7 : i32 | ||
! CHECK: %[[LB_K:.*]] = fir.convert %[[C7]] : (i32) -> index | ||
! CHECK: %[[C60:.*]] = arith.constant 60 : i32 | ||
! CHECK: %[[UB_K:.*]] = fir.convert %[[C60]] : (i32) -> index | ||
! CHECK: %[[STEP_K:.*]] = arith.constant 1 : index | ||
|
||
! CHECK: omp.parallel { | ||
|
||
! CHECK-NEXT: %[[ITER_VAR_I:.*]] = fir.alloca i32 {bindc_name = "i"} | ||
! CHECK-NEXT: %[[BINDING_I:.*]]:2 = hlfir.declare %[[ITER_VAR_I]] {uniq_name = "_QFEi"} | ||
|
||
! CHECK-NEXT: %[[ITER_VAR_J:.*]] = fir.alloca i32 {bindc_name = "j"} | ||
! CHECK-NEXT: %[[BINDING_J:.*]]:2 = hlfir.declare %[[ITER_VAR_J]] {uniq_name = "_QFEj"} | ||
|
||
! CHECK-NEXT: %[[ITER_VAR_K:.*]] = fir.alloca i32 {bindc_name = "k"} | ||
! CHECK-NEXT: %[[BINDING_K:.*]]:2 = hlfir.declare %[[ITER_VAR_K]] {uniq_name = "_QFEk"} | ||
|
||
! CHECK: omp.wsloop { | ||
! CHECK-NEXT: omp.loop_nest | ||
! CHECK-SAME: (%[[ARG0:[^[:space:]]+]], %[[ARG1:[^[:space:]]+]], %[[ARG2:[^[:space:]]+]]) | ||
! CHECK-SAME: : index = (%[[LB_I]], %[[LB_J]], %[[LB_K]]) | ||
! CHECK-SAME: to (%[[UB_I]], %[[UB_J]], %[[UB_K]]) inclusive | ||
! CHECK-SAME: step (%[[STEP_I]], %[[STEP_J]], %[[STEP_K]]) { | ||
|
||
! CHECK-NEXT: %[[IV_IDX_I:.*]] = fir.convert %[[ARG0]] | ||
! CHECK-NEXT: fir.store %[[IV_IDX_I]] to %[[BINDING_I]]#0 | ||
|
||
! CHECK-NEXT: %[[IV_IDX_J:.*]] = fir.convert %[[ARG1]] | ||
! CHECK-NEXT: fir.store %[[IV_IDX_J]] to %[[BINDING_J]]#0 | ||
|
||
! CHECK-NEXT: %[[IV_IDX_K:.*]] = fir.convert %[[ARG2]] | ||
! CHECK-NEXT: fir.store %[[IV_IDX_K]] to %[[BINDING_K]]#0 | ||
|
||
! CHECK: omp.yield | ||
! CHECK-NEXT: } | ||
! CHECK-NEXT: } | ||
|
||
! CHECK-NEXT: omp.terminator | ||
! CHECK-NEXT: } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.