-
Notifications
You must be signed in to change notification settings - Fork 16.4k
[Flang][OpenMP] Restructure recursive lowering in createBodyOfOp
#77761
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
62f3165
841ae5f
67ea0e5
883afd9
1b5524a
cc411f2
cb3f267
51a9edf
a1b59bf
8ead6c5
9394b96
00ad95a
1207d60
bb6795e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -385,7 +385,8 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { | |
| // construct | ||
| mlir::OpBuilder::InsertPoint unstructuredSectionsIP = | ||
| firOpBuilder.saveInsertionPoint(); | ||
| firOpBuilder.setInsertionPointToStart(&op->getRegion(0).back()); | ||
| mlir::Operation *lastOper = op->getRegion(0).back().getTerminator(); | ||
| firOpBuilder.setInsertionPoint(lastOper); | ||
| lastPrivIP = firOpBuilder.saveInsertionPoint(); | ||
| firOpBuilder.restoreInsertionPoint(unstructuredSectionsIP); | ||
| } | ||
|
|
@@ -2135,15 +2136,6 @@ static mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter, | |
| return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize); | ||
| } | ||
|
|
||
| static void resetBeforeTerminator(fir::FirOpBuilder &firOpBuilder, | ||
| mlir::Operation *storeOp, | ||
| mlir::Block &block) { | ||
| if (storeOp) | ||
| firOpBuilder.setInsertionPointAfter(storeOp); | ||
| else | ||
| firOpBuilder.setInsertionPointToStart(&block); | ||
| } | ||
|
|
||
| static mlir::Operation * | ||
| createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter, | ||
| mlir::Location loc, mlir::Value indexVal, | ||
|
|
@@ -2186,11 +2178,43 @@ static void createBodyOfOp( | |
| const llvm::SmallVector<const Fortran::semantics::Symbol *> &args = {}, | ||
| bool outerCombined = false, DataSharingProcessor *dsp = nullptr) { | ||
| fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); | ||
|
|
||
| auto insertMarker = [](fir::FirOpBuilder &builder) { | ||
| mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(), | ||
| builder.getIndexType()); | ||
| return undef.getDefiningOp(); | ||
| }; | ||
|
|
||
| // Find the block where the OMP terminator should go. In simple cases | ||
| // it is the single block in the operation's region. When the region | ||
| // is more complicated, especially with unstructured control flow, there | ||
| // may be multiple blocks, and some of them may have non-OMP terminators | ||
| // resulting from lowering of the code contained within the operation. | ||
| // By OpenMP rules, there should be a single exit point from the region: | ||
| // here exit means transfering control to the code following the operation. | ||
| // STOP statement is allowed and does not count as exit for the purpose of | ||
| // inserting terminators. | ||
| auto findExitBlock = [&](mlir::Region ®ion) -> mlir::Block * { | ||
| auto isTerminated = [](mlir::Block &block) -> bool { | ||
| if (block.empty()) | ||
| return false; | ||
| return block.back().hasTrait<mlir::OpTrait::IsTerminator>(); | ||
| }; | ||
|
|
||
| mlir::Block *exit = nullptr; | ||
| for (auto &block : region) { | ||
| if (!isTerminated(block)) { | ||
| assert(exit == nullptr && "Multiple exit block in OpenMP region"); | ||
| exit = █ | ||
| } | ||
| } | ||
| return exit; | ||
| }; | ||
|
|
||
| // If an argument for the region is provided then create the block with that | ||
| // argument. Also update the symbol's address with the mlir argument value. | ||
| // e.g. For loops the argument is the induction variable. And all further | ||
| // uses of the induction variable should use this mlir value. | ||
| mlir::Operation *storeOp = nullptr; | ||
| if (args.size()) { | ||
| std::size_t loopVarTypeSize = 0; | ||
| for (const Fortran::semantics::Symbol *arg : args) | ||
|
|
@@ -2201,20 +2225,20 @@ static void createBodyOfOp( | |
| firOpBuilder.createBlock(&op.getRegion(), {}, tiv, locs); | ||
| // The argument is not currently in memory, so make a temporary for the | ||
| // argument, and store it there, then bind that location to the argument. | ||
| mlir::Operation *storeOp = nullptr; | ||
| for (auto [argIndex, argSymbol] : llvm::enumerate(args)) { | ||
| mlir::Value indexVal = | ||
| fir::getBase(op.getRegion().front().getArgument(argIndex)); | ||
| storeOp = | ||
| createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); | ||
| } | ||
| firOpBuilder.setInsertionPointAfter(storeOp); | ||
| } else { | ||
| firOpBuilder.createBlock(&op.getRegion()); | ||
| } | ||
| // Set the insert for the terminator operation to go at the end of the | ||
| // block - this is either empty or the block with the stores above, | ||
| // the end of the block works for both. | ||
| mlir::Block &block = op.getRegion().back(); | ||
| firOpBuilder.setInsertionPointToEnd(&block); | ||
|
|
||
| // Mark the earliest insertion point. | ||
| mlir::Operation *marker = insertMarker(firOpBuilder); | ||
|
|
||
| // If it is an unstructured region and is not the outer region of a combined | ||
| // construct, create empty blocks for all evaluations. | ||
|
|
@@ -2223,37 +2247,64 @@ static void createBodyOfOp( | |
| mlir::omp::YieldOp>( | ||
| firOpBuilder, eval.getNestedEvaluations()); | ||
|
|
||
| // Insert the terminator. | ||
| Fortran::lower::genOpenMPTerminator(firOpBuilder, op.getOperation(), loc); | ||
| // Reset the insert point to before the terminator. | ||
| resetBeforeTerminator(firOpBuilder, storeOp, block); | ||
| // Start with privatization, so that the lowering of the nested | ||
| // code will use the right symbols. | ||
| constexpr bool isLoop = std::is_same_v<Op, mlir::omp::WsLoopOp> || | ||
| std::is_same_v<Op, mlir::omp::SimdLoopOp>; | ||
| bool privatize = clauses && !outerCombined; | ||
|
|
||
| // Handle privatization. Do not privatize if this is the outer operation. | ||
| if (clauses && !outerCombined) { | ||
| constexpr bool isLoop = std::is_same_v<Op, mlir::omp::WsLoopOp> || | ||
| std::is_same_v<Op, mlir::omp::SimdLoopOp>; | ||
| firOpBuilder.setInsertionPoint(marker); | ||
| std::optional<DataSharingProcessor> tempDsp; | ||
| if (privatize) { | ||
| if (!dsp) { | ||
| DataSharingProcessor proc(converter, *clauses, eval); | ||
| proc.processStep1(); | ||
| proc.processStep2(op, isLoop); | ||
| } else { | ||
| if (isLoop && args.size() > 0) | ||
| dsp->setLoopIV(converter.getSymbolAddress(*args[0])); | ||
| dsp->processStep2(op, isLoop); | ||
| tempDsp.emplace(converter, *clauses, eval); | ||
| tempDsp->processStep1(); | ||
| } | ||
|
|
||
| if (storeOp) | ||
| firOpBuilder.setInsertionPointAfter(storeOp); | ||
| } | ||
|
|
||
| if constexpr (std::is_same_v<Op, mlir::omp::ParallelOp>) { | ||
| threadPrivatizeVars(converter, eval); | ||
| if (clauses) | ||
| if (clauses) { | ||
| firOpBuilder.setInsertionPoint(marker); | ||
| ClauseProcessor(converter, *clauses).processCopyin(); | ||
| } | ||
| } | ||
|
|
||
| if (genNested) | ||
| if (genNested) { | ||
| // genFIR(Evaluation&) tries to patch up unterminated blocks, causing | ||
| // a lot of trouble if the terminator generation is delayed past this | ||
| // point. Insert a temporary terminator here, then delete it. | ||
| firOpBuilder.setInsertionPointToEnd(&op.getRegion().back()); | ||
| auto *temp = Fortran::lower::genOpenMPTerminator(firOpBuilder, | ||
| op.getOperation(), loc); | ||
| firOpBuilder.setInsertionPointAfter(marker); | ||
| genNestedEvaluations(converter, eval); | ||
| temp->erase(); | ||
| } | ||
|
|
||
| if (auto *exitBlock = findExitBlock(op.getRegion())) { | ||
| firOpBuilder.setInsertionPointToEnd(exitBlock); | ||
| auto *term = Fortran::lower::genOpenMPTerminator(firOpBuilder, | ||
| op.getOperation(), loc); | ||
| // Only insert lastprivate code when there actually is an exit block. | ||
| // Such a block may not exist if the nested code produced an infinite | ||
| // loop (this may not make sense in production code, but a user could | ||
| // write that and we should handle it). | ||
|
Comment on lines
+2300
to
+2303
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it guaranteed that it is only when there is an infinite loop that no exit block exists?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We rely on the Based on that, the logical thing to do would be to insert a terminator into all non-terminated blocks. This would work even if there is always at most one such block. The assertion that there can only be one comes mostly from two things:
If we can't be sure that the assertion is correct, we should be able to remove it, and process all exiting blocks.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What I'm going to do is to create a unique exiting block: create a new block, and for each yet-unterminated block, will insert a branch to the new one. Then insert the OMP terminator into it, and process the lastprivate in there. This will get rid of the assertion.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. Makes sense. |
||
| firOpBuilder.setInsertionPoint(term); | ||
| if (privatize) { | ||
| if (!dsp) { | ||
| assert(tempDsp.has_value()); | ||
| tempDsp->processStep2(op, isLoop); | ||
| } else { | ||
| if (isLoop && args.size() > 0) | ||
| dsp->setLoopIV(converter.getSymbolAddress(*args[0])); | ||
| dsp->processStep2(op, isLoop); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| firOpBuilder.setInsertionPointAfter(marker); | ||
| marker->erase(); | ||
| } | ||
|
|
||
| static void genBodyOfTargetDataOp( | ||
|
|
@@ -3685,14 +3736,14 @@ genOMP(Fortran::lower::AbstractConverter &converter, | |
| // Public functions | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| void Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder, | ||
| mlir::Operation *op, | ||
| mlir::Location loc) { | ||
| mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder, | ||
| mlir::Operation *op, | ||
| mlir::Location loc) { | ||
| if (mlir::isa<mlir::omp::WsLoopOp, mlir::omp::ReductionDeclareOp, | ||
| mlir::omp::AtomicUpdateOp, mlir::omp::SimdLoopOp>(op)) | ||
| builder.create<mlir::omp::YieldOp>(loc); | ||
| return builder.create<mlir::omp::YieldOp>(loc); | ||
| else | ||
| builder.create<mlir::omp::TerminatorOp>(loc); | ||
| return builder.create<mlir::omp::TerminatorOp>(loc); | ||
| } | ||
|
|
||
| void Fortran::lower::genOpenMPConstruct( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| ! RUN: bbc -fopenmp -o - %s | FileCheck %s | ||
|
|
||
| ! Check that this test can be lowered successfully. | ||
| ! See https://github.com/llvm/llvm-project/issues/74348 | ||
|
|
||
| ! CHECK-LABEL: func.func @_QPsb | ||
| ! CHECK: omp.parallel | ||
| ! CHECK: cf.cond_br %{{[0-9]+}}, ^bb1, ^bb2 | ||
| ! CHECK-NEXT: ^bb1: // pred: ^bb0 | ||
| ! CHECK: cf.br ^bb2 | ||
| ! CHECK-NEXT: ^bb2: // 3 preds: ^bb0, ^bb1, ^bb2 | ||
| ! CHECK-NEXT: cf.br ^bb2 | ||
| ! CHECK-NEXT: } | ||
|
|
||
| subroutine sb(ninter, numnod) | ||
| integer :: ninter, numnod | ||
| integer, dimension(:), allocatable :: indx_nm | ||
|
|
||
| !$omp parallel | ||
| if (ninter>0) then | ||
| allocate(indx_nm(numnod)) | ||
| endif | ||
| 220 continue | ||
| goto 220 | ||
| !$omp end parallel | ||
| end subroutine |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| ! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s | ||
|
|
||
| subroutine sub(imax, jmax, x, y) | ||
| integer, intent(in) :: imax, jmax | ||
| real, intent(in), dimension(1:imax, 1:jmax) :: x, y | ||
|
|
||
| integer :: i, j, ii | ||
|
|
||
| ! collapse(2) is needed to reproduce the issue | ||
| !$omp parallel do collapse(2) | ||
| do j = 1, jmax | ||
| do i = 1, imax | ||
| do ii = 1, imax ! note that this loop is not collapsed | ||
| if (x(i,j) < y(ii,j)) then | ||
| ! exit needed to force unstructured control flow | ||
| exit | ||
| endif | ||
| enddo | ||
| enddo | ||
| enddo | ||
| end subroutine sub | ||
|
|
||
| ! this is testing that we don't crash generating code for this: in particular | ||
| ! that all blocks are terminated | ||
|
|
||
| ! CHECK-LABEL: func.func @_QPsub( | ||
| ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<i32> {fir.bindc_name = "imax"}, | ||
| ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "jmax"}, | ||
| ! CHECK-SAME: %[[VAL_2:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "x"}, | ||
| ! CHECK-SAME: %[[VAL_3:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "y"}) { | ||
| ! [...] | ||
| ! CHECK: omp.wsloop for (%[[VAL_53:.*]], %[[VAL_54:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) { | ||
| ! [...] | ||
| ! CHECK: cf.br ^bb1 | ||
| ! CHECK: ^bb1: | ||
| ! CHECK: cf.br ^bb2 | ||
| ! CHECK: ^bb2: | ||
| ! [...] | ||
| ! CHECK: cf.br ^bb3 | ||
| ! CHECK: ^bb3: | ||
| ! [...] | ||
| ! CHECK: %[[VAL_63:.*]] = arith.cmpi sgt, %{{.*}}, %{{.*}} : i32 | ||
| ! CHECK: cf.cond_br %[[VAL_63]], ^bb4, ^bb7 | ||
| ! CHECK: ^bb4: | ||
| ! [...] | ||
| ! CHECK: %[[VAL_76:.*]] = arith.cmpf olt, %{{.*}}, %{{.*}} fastmath<contract> : f32 | ||
| ! CHECK: cf.cond_br %[[VAL_76]], ^bb5, ^bb6 | ||
| ! CHECK: ^bb5: | ||
| ! CHECK: cf.br ^bb7 | ||
| ! CHECK: ^bb6: | ||
| ! [...] | ||
| ! CHECK: cf.br ^bb3 | ||
| ! CHECK: ^bb7: | ||
| ! CHECK: omp.yield | ||
| ! CHECK: } | ||
| ! CHECK: omp.terminator | ||
| ! CHECK: } | ||
| ! CHECK: cf.br ^bb1 | ||
| ! CHECK: ^bb1: | ||
| ! CHECK: return | ||
| ! CHECK: } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where is the STOP statement accounted for here? The code here looks like find the block without a terminator and assert that there is only ONE.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
STOP is lowered as a function call, so we don't really see it as a control-flow construct in FIR.
Otherwise, you're right---it looks for blocks that haven't been terminated by any nested construct (i.e. after removing the temporary terminator at the end of the region), and asserts if there are more than 1.