diff --git a/flang/include/flang/Semantics/openmp-directive-sets.h b/flang/include/flang/Semantics/openmp-directive-sets.h index a4f27b00152e2..a40073ff47914 100644 --- a/flang/include/flang/Semantics/openmp-directive-sets.h +++ b/flang/include/flang/Semantics/openmp-directive-sets.h @@ -24,68 +24,80 @@ namespace llvm::omp { // - allSet: All standalone or combined uses of the directive. static const OmpDirectiveSet topParallelSet{ - Directive::OMPD_parallel, - Directive::OMPD_parallel_do, Directive::OMPD_parallel_do_simd, + Directive::OMPD_parallel_do, + Directive::OMPD_parallel_masked_taskloop_simd, + Directive::OMPD_parallel_masked_taskloop, + Directive::OMPD_parallel_master_taskloop_simd, + Directive::OMPD_parallel_master_taskloop, Directive::OMPD_parallel_sections, Directive::OMPD_parallel_workshare, + Directive::OMPD_parallel, }; static const OmpDirectiveSet allParallelSet{ - Directive::OMPD_distribute_parallel_do, - Directive::OMPD_distribute_parallel_do_simd, - Directive::OMPD_parallel, - Directive::OMPD_parallel_do, - Directive::OMPD_parallel_do_simd, - Directive::OMPD_parallel_sections, - Directive::OMPD_parallel_workshare, - Directive::OMPD_target_parallel, - Directive::OMPD_target_parallel_do, - Directive::OMPD_target_parallel_do_simd, - Directive::OMPD_target_teams_distribute_parallel_do, - Directive::OMPD_target_teams_distribute_parallel_do_simd, - Directive::OMPD_teams_distribute_parallel_do, - Directive::OMPD_teams_distribute_parallel_do_simd, + OmpDirectiveSet{ + Directive::OMPD_distribute_parallel_do_simd, + Directive::OMPD_distribute_parallel_do, + Directive::OMPD_target_parallel_do_simd, + Directive::OMPD_target_parallel_do, + Directive::OMPD_target_parallel, + Directive::OMPD_target_teams_distribute_parallel_do_simd, + Directive::OMPD_target_teams_distribute_parallel_do, + Directive::OMPD_teams_distribute_parallel_do_simd, + Directive::OMPD_teams_distribute_parallel_do, + } | topParallelSet, }; static const OmpDirectiveSet topDoSet{ - Directive::OMPD_do, Directive::OMPD_do_simd, + Directive::OMPD_do, }; static const OmpDirectiveSet allDoSet{ - Directive::OMPD_distribute_parallel_do, - Directive::OMPD_distribute_parallel_do_simd, - Directive::OMPD_parallel_do, - Directive::OMPD_parallel_do_simd, - Directive::OMPD_do, - Directive::OMPD_do_simd, - Directive::OMPD_target_parallel_do, - Directive::OMPD_target_parallel_do_simd, - Directive::OMPD_target_teams_distribute_parallel_do, - Directive::OMPD_target_teams_distribute_parallel_do_simd, - Directive::OMPD_teams_distribute_parallel_do, - Directive::OMPD_teams_distribute_parallel_do_simd, + OmpDirectiveSet{ + Directive::OMPD_distribute_parallel_do_simd, + Directive::OMPD_distribute_parallel_do, + Directive::OMPD_parallel_do_simd, + Directive::OMPD_parallel_do, + Directive::OMPD_target_parallel_do_simd, + Directive::OMPD_target_parallel_do, + Directive::OMPD_target_teams_distribute_parallel_do_simd, + Directive::OMPD_target_teams_distribute_parallel_do, + Directive::OMPD_teams_distribute_parallel_do_simd, + Directive::OMPD_teams_distribute_parallel_do, + } | topDoSet, }; static const OmpDirectiveSet topTaskloopSet{ - Directive::OMPD_taskloop, Directive::OMPD_taskloop_simd, + Directive::OMPD_taskloop, }; -static const OmpDirectiveSet allTaskloopSet{topTaskloopSet}; +static const OmpDirectiveSet allTaskloopSet{ + OmpDirectiveSet{ + Directive::OMPD_masked_taskloop_simd, + Directive::OMPD_masked_taskloop, + Directive::OMPD_master_taskloop_simd, + Directive::OMPD_master_taskloop, + Directive::OMPD_parallel_masked_taskloop_simd, + Directive::OMPD_parallel_masked_taskloop, + Directive::OMPD_parallel_master_taskloop_simd, + Directive::OMPD_parallel_master_taskloop, + } | topTaskloopSet, +}; static const OmpDirectiveSet topTargetSet{ - Directive::OMPD_target, - Directive::OMPD_target_parallel, - Directive::OMPD_target_parallel_do, Directive::OMPD_target_parallel_do_simd, + Directive::OMPD_target_parallel_do, + Directive::OMPD_target_parallel, Directive::OMPD_target_simd, - Directive::OMPD_target_teams, - Directive::OMPD_target_teams_distribute, - Directive::OMPD_target_teams_distribute_parallel_do, Directive::OMPD_target_teams_distribute_parallel_do_simd, + Directive::OMPD_target_teams_distribute_parallel_do, Directive::OMPD_target_teams_distribute_simd, + Directive::OMPD_target_teams_distribute, + Directive::OMPD_target_teams, + Directive::OMPD_target, }; static const OmpDirectiveSet allTargetSet{topTargetSet}; @@ -95,61 +107,61 @@ static const OmpDirectiveSet topSimdSet{ }; static const OmpDirectiveSet allSimdSet{ - Directive::OMPD_distribute_parallel_do_simd, - Directive::OMPD_distribute_simd, - Directive::OMPD_do_simd, - Directive::OMPD_parallel_do_simd, - Directive::OMPD_simd, - Directive::OMPD_target_parallel_do_simd, - Directive::OMPD_target_simd, - Directive::OMPD_target_teams_distribute_parallel_do_simd, - Directive::OMPD_target_teams_distribute_simd, - Directive::OMPD_taskloop_simd, - Directive::OMPD_teams_distribute_parallel_do_simd, - Directive::OMPD_teams_distribute_simd, + OmpDirectiveSet{ + Directive::OMPD_distribute_parallel_do_simd, + Directive::OMPD_distribute_simd, + Directive::OMPD_do_simd, + Directive::OMPD_masked_taskloop_simd, + Directive::OMPD_master_taskloop_simd, + Directive::OMPD_parallel_do_simd, + Directive::OMPD_parallel_masked_taskloop_simd, + Directive::OMPD_parallel_master_taskloop_simd, + Directive::OMPD_target_parallel_do_simd, + Directive::OMPD_target_simd, + Directive::OMPD_target_teams_distribute_parallel_do_simd, + Directive::OMPD_target_teams_distribute_simd, + Directive::OMPD_taskloop_simd, + Directive::OMPD_teams_distribute_parallel_do_simd, + Directive::OMPD_teams_distribute_simd, + } | topSimdSet, }; static const OmpDirectiveSet topTeamsSet{ - Directive::OMPD_teams, - Directive::OMPD_teams_distribute, - Directive::OMPD_teams_distribute_parallel_do, Directive::OMPD_teams_distribute_parallel_do_simd, + Directive::OMPD_teams_distribute_parallel_do, Directive::OMPD_teams_distribute_simd, + Directive::OMPD_teams_distribute, + Directive::OMPD_teams, }; static const OmpDirectiveSet allTeamsSet{ - llvm::omp::OMPD_target_teams, - llvm::omp::OMPD_target_teams_distribute, - llvm::omp::OMPD_target_teams_distribute_parallel_do, - llvm::omp::OMPD_target_teams_distribute_parallel_do_simd, - llvm::omp::OMPD_target_teams_distribute_simd, - llvm::omp::OMPD_teams, - llvm::omp::OMPD_teams_distribute, - llvm::omp::OMPD_teams_distribute_parallel_do, - llvm::omp::OMPD_teams_distribute_parallel_do_simd, - llvm::omp::OMPD_teams_distribute_simd, + OmpDirectiveSet{ + llvm::omp::OMPD_target_teams_distribute_parallel_do_simd, + llvm::omp::OMPD_target_teams_distribute_parallel_do, + llvm::omp::OMPD_target_teams_distribute_simd, + llvm::omp::OMPD_target_teams_distribute, + llvm::omp::OMPD_target_teams, + } | topTeamsSet, }; static const OmpDirectiveSet topDistributeSet{ - Directive::OMPD_distribute, - Directive::OMPD_distribute_parallel_do, Directive::OMPD_distribute_parallel_do_simd, + Directive::OMPD_distribute_parallel_do, Directive::OMPD_distribute_simd, + Directive::OMPD_distribute, }; static const OmpDirectiveSet allDistributeSet{ - llvm::omp::OMPD_distribute, - llvm::omp::OMPD_distribute_parallel_do, - llvm::omp::OMPD_distribute_parallel_do_simd, - llvm::omp::OMPD_distribute_simd, - llvm::omp::OMPD_target_teams_distribute, - llvm::omp::OMPD_target_teams_distribute_parallel_do, - llvm::omp::OMPD_target_teams_distribute_parallel_do_simd, - llvm::omp::OMPD_target_teams_distribute_simd, - llvm::omp::OMPD_teams_distribute, - llvm::omp::OMPD_teams_distribute_parallel_do, - llvm::omp::OMPD_teams_distribute_parallel_do_simd, - llvm::omp::OMPD_teams_distribute_simd, + OmpDirectiveSet{ + llvm::omp::OMPD_target_teams_distribute_parallel_do_simd, + llvm::omp::OMPD_target_teams_distribute_parallel_do, + llvm::omp::OMPD_target_teams_distribute_simd, + llvm::omp::OMPD_target_teams_distribute, + llvm::omp::OMPD_teams_distribute_parallel_do_simd, + llvm::omp::OMPD_teams_distribute_parallel_do, + llvm::omp::OMPD_teams_distribute_simd, + llvm::omp::OMPD_teams_distribute, + } | topDistributeSet, }; //===----------------------------------------------------------------------===// @@ -188,8 +200,16 @@ static const OmpDirectiveSet loopConstructSet{ Directive::OMPD_distribute, Directive::OMPD_do_simd, Directive::OMPD_do, + Directive::OMPD_masked_taskloop, + Directive::OMPD_masked_taskloop_simd, + Directive::OMPD_master_taskloop, + Directive::OMPD_master_taskloop_simd, Directive::OMPD_parallel_do_simd, Directive::OMPD_parallel_do, + Directive::OMPD_parallel_masked_taskloop, + Directive::OMPD_parallel_masked_taskloop_simd, + Directive::OMPD_parallel_master_taskloop, + Directive::OMPD_parallel_master_taskloop_simd, Directive::OMPD_simd, Directive::OMPD_target_parallel_do_simd, Directive::OMPD_target_parallel_do, diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 833989cdc7023..4cb2aa74e1791 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -230,17 +230,30 @@ addUseDeviceClause(Fortran::lower::AbstractConverter &converter, } } +static void convertLoopBounds(Fortran::lower::AbstractConverter &converter, + mlir::Location loc, CollapseClauseOps &ops, + std::size_t loopVarTypeSize) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + // The types of lower bound, upper bound, and step are converted into the + // type of the loop variable if necessary. + mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); + for (unsigned it = 0; it < (unsigned)ops.loopLBVar.size(); it++) { + ops.loopLBVar[it] = + firOpBuilder.createConvert(loc, loopVarType, ops.loopLBVar[it]); + ops.loopUBVar[it] = + firOpBuilder.createConvert(loc, loopVarType, ops.loopUBVar[it]); + ops.loopStepVar[it] = + firOpBuilder.createConvert(loc, loopVarType, ops.loopStepVar[it]); + } +} + //===----------------------------------------------------------------------===// // ClauseProcessor unique clauses //===----------------------------------------------------------------------===// -bool ClauseProcessor::processCollapse( - mlir::Location currentLocation, Fortran::lower::pft::Evaluation &eval, - llvm::SmallVectorImpl &lowerBound, - llvm::SmallVectorImpl &upperBound, - llvm::SmallVectorImpl &step, - llvm::SmallVectorImpl &iv, - std::size_t &loopVarTypeSize) const { +bool ClauseProcessor::processCollapse(mlir::Location currentLocation, + Fortran::lower::pft::Evaluation &eval, + CollapseClauseOps &ops) const { bool found = false; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); @@ -259,7 +272,7 @@ bool ClauseProcessor::processCollapse( found = true; } - loopVarTypeSize = 0; + std::size_t loopVarTypeSize = 0; do { Fortran::lower::pft::Evaluation *doLoop = &doConstructEval->getFirstNestedEvaluation(); @@ -271,18 +284,18 @@ bool ClauseProcessor::processCollapse( std::get_if(&loopControl->u); assert(bounds && "Expected bounds for worksharing do loop"); Fortran::lower::StatementContext stmtCtx; - lowerBound.push_back(fir::getBase(converter.genExprValue( + ops.loopLBVar.push_back(fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(bounds->lower), stmtCtx))); - upperBound.push_back(fir::getBase(converter.genExprValue( + ops.loopUBVar.push_back(fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(bounds->upper), stmtCtx))); if (bounds->step) { - step.push_back(fir::getBase(converter.genExprValue( + ops.loopStepVar.push_back(fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(bounds->step), stmtCtx))); } else { // If `step` is not present, assume it as `1`. - step.push_back(firOpBuilder.createIntegerConstant( + ops.loopStepVar.push_back(firOpBuilder.createIntegerConstant( currentLocation, firOpBuilder.getIntegerType(32), 1)); } - iv.push_back(bounds->name.thing.symbol); + ops.loopIV.push_back(bounds->name.thing.symbol); loopVarTypeSize = std::max(loopVarTypeSize, bounds->name.thing.symbol->GetUltimate().size()); collapseValue--; @@ -290,6 +303,9 @@ bool ClauseProcessor::processCollapse( &*std::next(doConstructEval->getNestedEvaluations().begin()); } while (collapseValue > 0); + if (found) + convertLoopBounds(converter, currentLocation, ops, loopVarTypeSize); + return found; } @@ -316,7 +332,7 @@ bool ClauseProcessor::processDefault() const { } bool ClauseProcessor::processDevice(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const { + DeviceClauseOps &result) const { const Fortran::parser::CharBlock *source = nullptr; if (auto *deviceClause = findUniqueClause(&source)) { mlir::Location clauseLocation = converter.genLocation(*source); @@ -330,26 +346,26 @@ bool ClauseProcessor::processDevice(Fortran::lower::StatementContext &stmtCtx, } if (const auto *deviceExpr = Fortran::semantics::GetExpr( std::get(deviceClause->v.t))) { - result = fir::getBase(converter.genExprValue(*deviceExpr, stmtCtx)); + result.deviceVar = + fir::getBase(converter.genExprValue(*deviceExpr, stmtCtx)); } return true; } return false; } -bool ClauseProcessor::processDeviceType( - mlir::omp::DeclareTargetDeviceType &result) const { +bool ClauseProcessor::processDeviceType(DeviceTypeClauseOps &result) const { if (auto *deviceTypeClause = findUniqueClause()) { // Case: declare target ... device_type(any | host | nohost) switch (deviceTypeClause->v.v) { case Fortran::parser::OmpDeviceTypeClause::Type::Nohost: - result = mlir::omp::DeclareTargetDeviceType::nohost; + result.deviceType = mlir::omp::DeclareTargetDeviceType::nohost; break; case Fortran::parser::OmpDeviceTypeClause::Type::Host: - result = mlir::omp::DeclareTargetDeviceType::host; + result.deviceType = mlir::omp::DeclareTargetDeviceType::host; break; case Fortran::parser::OmpDeviceTypeClause::Type::Any: - result = mlir::omp::DeclareTargetDeviceType::any; + result.deviceType = mlir::omp::DeclareTargetDeviceType::any; break; } return true; @@ -358,7 +374,7 @@ bool ClauseProcessor::processDeviceType( } bool ClauseProcessor::processFinal(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const { + FinalClauseOps &result) const { const Fortran::parser::CharBlock *source = nullptr; if (auto *finalClause = findUniqueClause(&source)) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); @@ -366,38 +382,38 @@ bool ClauseProcessor::processFinal(Fortran::lower::StatementContext &stmtCtx, mlir::Value finalVal = fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(finalClause->v), stmtCtx)); - result = firOpBuilder.createConvert(clauseLocation, - firOpBuilder.getI1Type(), finalVal); + result.finalVar = firOpBuilder.createConvert( + clauseLocation, firOpBuilder.getI1Type(), finalVal); return true; } return false; } -bool ClauseProcessor::processHint(mlir::IntegerAttr &result) const { +bool ClauseProcessor::processHint(HintClauseOps &result) const { if (auto *hintClause = findUniqueClause()) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); const auto *expr = Fortran::semantics::GetExpr(hintClause->v); int64_t hintValue = *Fortran::evaluate::ToInt64(*expr); - result = firOpBuilder.getI64IntegerAttr(hintValue); + result.hintAttr = firOpBuilder.getI64IntegerAttr(hintValue); return true; } return false; } -bool ClauseProcessor::processMergeable(mlir::UnitAttr &result) const { - return markClauseOccurrence(result); +bool ClauseProcessor::processMergeable(MergeableClauseOps &result) const { + return markClauseOccurrence(result.mergeableAttr); } -bool ClauseProcessor::processNowait(mlir::UnitAttr &result) const { - return markClauseOccurrence(result); +bool ClauseProcessor::processNowait(NowaitClauseOps &result) const { + return markClauseOccurrence(result.nowaitAttr); } bool ClauseProcessor::processNumTeams(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const { + NumTeamsClauseOps &result) const { // TODO Get lower and upper bounds for num_teams when parser is updated to // accept both. if (auto *numTeamsClause = findUniqueClause()) { - result = fir::getBase(converter.genExprValue( + result.numTeamsUpperVar = fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(numTeamsClause->v), stmtCtx)); return true; } @@ -405,17 +421,18 @@ bool ClauseProcessor::processNumTeams(Fortran::lower::StatementContext &stmtCtx, } bool ClauseProcessor::processNumThreads( - Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const { + Fortran::lower::StatementContext &stmtCtx, + NumThreadsClauseOps &result) const { if (auto *numThreadsClause = findUniqueClause()) { // OMPIRBuilder expects `NUM_THREADS` clause as a `Value`. - result = fir::getBase(converter.genExprValue( + result.numThreadsVar = fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(numThreadsClause->v), stmtCtx)); return true; } return false; } -bool ClauseProcessor::processOrdered(mlir::IntegerAttr &result) const { +bool ClauseProcessor::processOrdered(OrderedClauseOps &result) const { if (auto *orderedClause = findUniqueClause()) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); int64_t orderedClauseValue = 0l; @@ -423,48 +440,45 @@ bool ClauseProcessor::processOrdered(mlir::IntegerAttr &result) const { const auto *expr = Fortran::semantics::GetExpr(orderedClause->v); orderedClauseValue = *Fortran::evaluate::ToInt64(*expr); } - result = firOpBuilder.getI64IntegerAttr(orderedClauseValue); + result.orderedAttr = firOpBuilder.getI64IntegerAttr(orderedClauseValue); return true; } return false; } bool ClauseProcessor::processPriority(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const { + PriorityClauseOps &result) const { if (auto *priorityClause = findUniqueClause()) { - result = fir::getBase(converter.genExprValue( + result.priorityVar = fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(priorityClause->v), stmtCtx)); return true; } return false; } -bool ClauseProcessor::processProcBind( - mlir::omp::ClauseProcBindKindAttr &result) const { +bool ClauseProcessor::processProcBind(ProcBindClauseOps &result) const { if (auto *procBindClause = findUniqueClause()) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - result = genProcBindKindAttr(firOpBuilder, procBindClause); + result.procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause); return true; } return false; } -bool ClauseProcessor::processSafelen(mlir::IntegerAttr &result) const { +bool ClauseProcessor::processSafelen(SafelenClauseOps &result) const { if (auto *safelenClause = findUniqueClause()) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); const auto *expr = Fortran::semantics::GetExpr(safelenClause->v); const std::optional safelenVal = Fortran::evaluate::ToInt64(*expr); - result = firOpBuilder.getI64IntegerAttr(*safelenVal); + result.safelenAttr = firOpBuilder.getI64IntegerAttr(*safelenVal); return true; } return false; } -bool ClauseProcessor::processSchedule( - mlir::omp::ClauseScheduleKindAttr &valAttr, - mlir::omp::ScheduleModifierAttr &modifierAttr, - mlir::UnitAttr &simdModifierAttr) const { +bool ClauseProcessor::processSchedule(Fortran::lower::StatementContext &stmtCtx, + ScheduleClauseOps &result) const { if (auto *scheduleClause = findUniqueClause()) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::MLIRContext *context = firOpBuilder.getContext(); @@ -496,26 +510,21 @@ bool ClauseProcessor::processSchedule( getScheduleModifier(scheduleClause->v); if (scheduleModifier != mlir::omp::ScheduleModifier::none) - modifierAttr = + result.scheduleModAttr = mlir::omp::ScheduleModifierAttr::get(context, scheduleModifier); if (getSimdModifier(scheduleClause->v) != mlir::omp::ScheduleModifier::none) - simdModifierAttr = firOpBuilder.getUnitAttr(); + result.scheduleSimdAttr = firOpBuilder.getUnitAttr(); - valAttr = mlir::omp::ClauseScheduleKindAttr::get(context, scheduleKind); - return true; - } - return false; -} + result.scheduleValAttr = + mlir::omp::ClauseScheduleKindAttr::get(context, scheduleKind); -bool ClauseProcessor::processScheduleChunk( - Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const { - if (auto *scheduleClause = findUniqueClause()) { if (const auto &chunkExpr = std::get>( scheduleClause->v.t)) { if (const auto *expr = Fortran::semantics::GetExpr(*chunkExpr)) { - result = fir::getBase(converter.genExprValue(*expr, stmtCtx)); + result.scheduleChunkVar = + fir::getBase(converter.genExprValue(*expr, stmtCtx)); } } return true; @@ -523,48 +532,47 @@ bool ClauseProcessor::processScheduleChunk( return false; } -bool ClauseProcessor::processSimdlen(mlir::IntegerAttr &result) const { +bool ClauseProcessor::processSimdlen(SimdlenClauseOps &result) const { if (auto *simdlenClause = findUniqueClause()) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); const auto *expr = Fortran::semantics::GetExpr(simdlenClause->v); const std::optional simdlenVal = Fortran::evaluate::ToInt64(*expr); - result = firOpBuilder.getI64IntegerAttr(*simdlenVal); + result.simdlenAttr = firOpBuilder.getI64IntegerAttr(*simdlenVal); return true; } return false; } bool ClauseProcessor::processThreadLimit( - Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const { + Fortran::lower::StatementContext &stmtCtx, + ThreadLimitClauseOps &result) const { if (auto *threadLmtClause = findUniqueClause()) { - result = fir::getBase(converter.genExprValue( + result.threadLimitVar = fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(threadLmtClause->v), stmtCtx)); return true; } return false; } -bool ClauseProcessor::processUntied(mlir::UnitAttr &result) const { - return markClauseOccurrence(result); +bool ClauseProcessor::processUntied(UntiedClauseOps &result) const { + return markClauseOccurrence(result.untiedAttr); } //===----------------------------------------------------------------------===// // ClauseProcessor repeatable clauses //===----------------------------------------------------------------------===// -bool ClauseProcessor::processAllocate( - llvm::SmallVectorImpl &allocatorOperands, - llvm::SmallVectorImpl &allocateOperands) const { +bool ClauseProcessor::processAllocate(AllocateClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::Allocate *allocateClause, const Fortran::parser::CharBlock &) { - genAllocateClause(converter, allocateClause->v, allocatorOperands, - allocateOperands); + genAllocateClause(converter, allocateClause->v, result.allocatorVars, + result.allocateVars); }); } -bool ClauseProcessor::processCopyin() const { +bool ClauseProcessor::processCopyin(CopyinClauseOps &) const { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint(); firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); @@ -709,10 +717,8 @@ createCopyFunc(mlir::Location loc, Fortran::lower::AbstractConverter &converter, return funcOp; } -bool ClauseProcessor::processCopyPrivate( - mlir::Location currentLocation, - llvm::SmallVectorImpl ©PrivateVars, - llvm::SmallVectorImpl ©PrivateFuncs) const { +bool ClauseProcessor::processCopyprivate(mlir::Location currentLocation, + CopyprivateClauseOps &result) const { auto addCopyPrivateVar = [&](Fortran::semantics::Symbol *sym) { mlir::Value symVal = converter.getSymbolAddress(*sym); auto declOp = symVal.getDefiningOp(); @@ -739,10 +745,10 @@ bool ClauseProcessor::processCopyPrivate( cpVar = alloca; } - copyPrivateVars.push_back(cpVar); + result.copyprivateVars.push_back(cpVar); mlir::func::FuncOp funcOp = createCopyFunc(currentLocation, converter, cpVar.getType(), attrs); - copyPrivateFuncs.push_back(mlir::SymbolRefAttr::get(funcOp)); + result.copyprivateFuncs.push_back(mlir::SymbolRefAttr::get(funcOp)); }; bool hasCopyPrivate = findRepeatableClause( @@ -765,9 +771,7 @@ bool ClauseProcessor::processCopyPrivate( return hasCopyPrivate; } -bool ClauseProcessor::processDepend( - llvm::SmallVectorImpl &dependTypeOperands, - llvm::SmallVectorImpl &dependOperands) const { +bool ClauseProcessor::processDepend(DependClauseOps &result) const { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); return findRepeatableClause( @@ -780,8 +784,8 @@ bool ClauseProcessor::processDepend( .t); mlir::omp::ClauseTaskDependAttr dependTypeOperand = genDependKindAttr(firOpBuilder, dependClause); - dependTypeOperands.insert(dependTypeOperands.end(), depVal.size(), - dependTypeOperand); + result.dependTypeAttrs.insert(result.dependTypeAttrs.end(), + depVal.size(), dependTypeOperand); for (const Fortran::parser::Designator &ompObject : depVal) { Fortran::semantics::Symbol *sym = nullptr; std::visit( @@ -803,14 +807,14 @@ bool ClauseProcessor::processDepend( }}, (ompObject).u); const mlir::Value variable = converter.getSymbolAddress(*sym); - dependOperands.push_back(variable); + result.dependVars.push_back(variable); } }); } bool ClauseProcessor::processIf( Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName, - mlir::Value &result) const { + IfClauseOps &result) const { bool found = false; findRepeatableClause( [&](const ClauseTy::If *ifClause, @@ -821,21 +825,21 @@ bool ClauseProcessor::processIf( // Assume that, at most, a single 'if' clause will be applicable to the // given directive. if (operand) { - result = operand; + result.ifVar = operand; found = true; } }); return found; } -bool ClauseProcessor::processLink( - llvm::SmallVectorImpl &result) const { +bool ClauseProcessor::processLink(EnterLinkToClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::Link *linkClause, const Fortran::parser::CharBlock &) { // Case: declare target link(var1, var2)... - gatherFuncAndVarSyms( - linkClause->v, mlir::omp::DeclareTargetCaptureClause::link, result); + gatherFuncAndVarSyms(linkClause->v, + mlir::omp::DeclareTargetCaptureClause::link, + result.symbolAndClause); }); } @@ -863,14 +867,9 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc, return op; } -bool ClauseProcessor::processMap( - mlir::Location currentLocation, const llvm::omp::Directive &directive, - Fortran::lower::StatementContext &stmtCtx, - llvm::SmallVectorImpl &mapOperands, - llvm::SmallVectorImpl *mapSymTypes, - llvm::SmallVectorImpl *mapSymLocs, - llvm::SmallVectorImpl *mapSymbols) - const { +bool ClauseProcessor::processMap(mlir::Location currentLocation, + Fortran::lower::StatementContext &stmtCtx, + MapClauseOps &result) const { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); return findRepeatableClause( [&](const ClauseTy::Map *mapClause, @@ -946,100 +945,92 @@ bool ClauseProcessor::processMap( mapTypeBits), mlir::omp::VariableCaptureKind::ByRef, symAddr.getType()); - mapOperands.push_back(mapOp); - if (mapSymTypes) - mapSymTypes->push_back(symAddr.getType()); - if (mapSymLocs) - mapSymLocs->push_back(symAddr.getLoc()); - - if (mapSymbols) - mapSymbols->push_back(getOmpObjectSymbol(ompObject)); + result.mapVars.push_back(mapOp); + if (result.mapSymTypes) + result.mapSymTypes->push_back(symAddr.getType()); + if (result.mapSymLocs) + result.mapSymLocs->push_back(symAddr.getLoc()); + if (result.mapSymbols) + result.mapSymbols->push_back(getOmpObjectSymbol(ompObject)); } }); } bool ClauseProcessor::processTargetReduction( - llvm::SmallVector &reductionSymbols) - const { + TargetReductionClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::Reduction *reductionClause, const Fortran::parser::CharBlock &) { ReductionProcessor rp; - rp.addReductionSym(reductionClause->v, reductionSymbols); + rp.addReductionSym(reductionClause->v, result.targetReductionSymbols); }); } -bool ClauseProcessor::processReduction( - mlir::Location currentLocation, - llvm::SmallVectorImpl &reductionVars, - llvm::SmallVectorImpl &reductionDeclSymbols, - llvm::SmallVectorImpl *reductionSymbols) - const { +bool ClauseProcessor::processReduction(mlir::Location currentLocation, + ReductionClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::Reduction *reductionClause, const Fortran::parser::CharBlock &) { ReductionProcessor rp; rp.addReductionDecl(currentLocation, converter, reductionClause->v, - reductionVars, reductionDeclSymbols, - reductionSymbols); + result.reductionVars, result.reductionDeclSymbols, + result.reductionSymbols ? &*result.reductionSymbols + : nullptr); + result.reductionTypes.reserve(result.reductionVars.size()); + llvm::transform(result.reductionVars, + std::back_inserter(result.reductionTypes), + [](mlir::Value v) { return v.getType(); }); }); } -bool ClauseProcessor::processSectionsReduction( - mlir::Location currentLocation) const { +bool ClauseProcessor::processSectionsReduction(mlir::Location currentLocation, + ReductionClauseOps &) const { return findRepeatableClause( [&](const ClauseTy::Reduction *, const Fortran::parser::CharBlock &) { + // Either implement special handling or remove this method and use the + // generic processReduction() method instead. TODO(currentLocation, "OMPC_Reduction"); }); } -bool ClauseProcessor::processTo( - llvm::SmallVectorImpl &result) const { +bool ClauseProcessor::processTo(EnterLinkToClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::To *toClause, const Fortran::parser::CharBlock &) { // Case: declare target to(func, var1, var2)... gatherFuncAndVarSyms(toClause->v, - mlir::omp::DeclareTargetCaptureClause::to, result); + mlir::omp::DeclareTargetCaptureClause::to, + result.symbolAndClause); }); } -bool ClauseProcessor::processEnter( - llvm::SmallVectorImpl &result) const { +bool ClauseProcessor::processEnter(EnterLinkToClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::Enter *enterClause, const Fortran::parser::CharBlock &) { // Case: declare target enter(func, var1, var2)... gatherFuncAndVarSyms(enterClause->v, mlir::omp::DeclareTargetCaptureClause::enter, - result); + result.symbolAndClause); }); } -bool ClauseProcessor::processUseDeviceAddr( - llvm::SmallVectorImpl &operands, - llvm::SmallVectorImpl &useDeviceTypes, - llvm::SmallVectorImpl &useDeviceLocs, - llvm::SmallVectorImpl &useDeviceSymbols) - const { +bool ClauseProcessor::processUseDeviceAddr(UseDeviceClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::UseDeviceAddr *devAddrClause, const Fortran::parser::CharBlock &) { - addUseDeviceClause(converter, devAddrClause->v, operands, - useDeviceTypes, useDeviceLocs, useDeviceSymbols); + addUseDeviceClause(converter, devAddrClause->v, + result.useDeviceAddrVars, result.useDeviceTypes, + result.useDeviceLocs, result.useDeviceSymbols); }); } -bool ClauseProcessor::processUseDevicePtr( - llvm::SmallVectorImpl &operands, - llvm::SmallVectorImpl &useDeviceTypes, - llvm::SmallVectorImpl &useDeviceLocs, - llvm::SmallVectorImpl &useDeviceSymbols) - const { +bool ClauseProcessor::processUseDevicePtr(UseDeviceClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::UseDevicePtr *devPtrClause, const Fortran::parser::CharBlock &) { - addUseDeviceClause(converter, devPtrClause->v, operands, useDeviceTypes, - useDeviceLocs, useDeviceSymbols); + addUseDeviceClause(converter, devPtrClause->v, result.useDevicePtrVars, + result.useDeviceTypes, result.useDeviceLocs, + result.useDeviceSymbols); }); } } // namespace omp diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index 0b91aca3d1344..c6b0b73dd1a56 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -13,6 +13,7 @@ #define FORTRAN_LOWER_CLAUASEPROCESSOR_H #include "DirectivesCommon.h" +#include "OperationClauses.h" #include "ReductionProcessor.h" #include "Utils.h" #include "flang/Lower/AbstractConverter.h" @@ -54,101 +55,62 @@ class ClauseProcessor { : converter(converter), semaCtx(semaCtx), clauses(clauses) {} // 'Unique' clauses: They can appear at most once in the clause list. - bool - processCollapse(mlir::Location currentLocation, - Fortran::lower::pft::Evaluation &eval, - llvm::SmallVectorImpl &lowerBound, - llvm::SmallVectorImpl &upperBound, - llvm::SmallVectorImpl &step, - llvm::SmallVectorImpl &iv, - std::size_t &loopVarTypeSize) const; + bool processCollapse(mlir::Location currentLocation, + Fortran::lower::pft::Evaluation &eval, + CollapseClauseOps &result) const; bool processDefault() const; bool processDevice(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const; - bool processDeviceType(mlir::omp::DeclareTargetDeviceType &result) const; + DeviceClauseOps &result) const; + bool processDeviceType(DeviceTypeClauseOps &result) const; bool processFinal(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const; - bool processHint(mlir::IntegerAttr &result) const; - bool processMergeable(mlir::UnitAttr &result) const; - bool processNowait(mlir::UnitAttr &result) const; + FinalClauseOps &result) const; + bool processHint(HintClauseOps &result) const; + bool processMergeable(MergeableClauseOps &result) const; + bool processNowait(NowaitClauseOps &result) const; bool processNumTeams(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const; + NumTeamsClauseOps &result) const; bool processNumThreads(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const; - bool processOrdered(mlir::IntegerAttr &result) const; + NumThreadsClauseOps &result) const; + bool processOrdered(OrderedClauseOps &result) const; bool processPriority(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const; - bool processProcBind(mlir::omp::ClauseProcBindKindAttr &result) const; - bool processSafelen(mlir::IntegerAttr &result) const; - bool processSchedule(mlir::omp::ClauseScheduleKindAttr &valAttr, - mlir::omp::ScheduleModifierAttr &modifierAttr, - mlir::UnitAttr &simdModifierAttr) const; - bool processScheduleChunk(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const; - bool processSimdlen(mlir::IntegerAttr &result) const; + PriorityClauseOps &result) const; + bool processProcBind(ProcBindClauseOps &result) const; + bool processSafelen(SafelenClauseOps &result) const; + bool processSchedule(Fortran::lower::StatementContext &stmtCtx, + ScheduleClauseOps &result) const; + bool processSimdlen(SimdlenClauseOps &result) const; bool processThreadLimit(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const; - bool processUntied(mlir::UnitAttr &result) const; + ThreadLimitClauseOps &result) const; + bool processUntied(UntiedClauseOps &result) const; // 'Repeatable' clauses: They can appear multiple times in the clause list. - bool - processAllocate(llvm::SmallVectorImpl &allocatorOperands, - llvm::SmallVectorImpl &allocateOperands) const; - bool processCopyin() const; - bool processCopyPrivate( - mlir::Location currentLocation, - llvm::SmallVectorImpl ©PrivateVars, - llvm::SmallVectorImpl ©PrivateFuncs) const; - bool processDepend(llvm::SmallVectorImpl &dependTypeOperands, - llvm::SmallVectorImpl &dependOperands) const; - bool - processEnter(llvm::SmallVectorImpl &result) const; + bool processAllocate(AllocateClauseOps &result) const; + bool processCopyin(CopyinClauseOps &result) const; + bool processCopyprivate(mlir::Location currentLocation, + CopyprivateClauseOps &result) const; + bool processDepend(DependClauseOps &result) const; + bool processEnter(EnterLinkToClauseOps &result) const; bool processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName, - mlir::Value &result) const; - bool - processLink(llvm::SmallVectorImpl &result) const; + IfClauseOps &result) const; + bool processLink(EnterLinkToClauseOps &result) const; // This method is used to process a map clause. - // The optional parameters - mapSymTypes, mapSymLocs & mapSymbols are used to - // store the original type, location and Fortran symbol for the map operands. - // They may be used later on to create the block_arguments for some of the - // target directives that require it. bool processMap(mlir::Location currentLocation, - const llvm::omp::Directive &directive, Fortran::lower::StatementContext &stmtCtx, - llvm::SmallVectorImpl &mapOperands, - llvm::SmallVectorImpl *mapSymTypes = nullptr, - llvm::SmallVectorImpl *mapSymLocs = nullptr, - llvm::SmallVectorImpl - *mapSymbols = nullptr) const; - bool - processReduction(mlir::Location currentLocation, - llvm::SmallVectorImpl &reductionVars, - llvm::SmallVectorImpl &reductionDeclSymbols, - llvm::SmallVectorImpl - *reductionSymbols = nullptr) const; - bool processTargetReduction( - llvm::SmallVector &reductionSymbols) - const; - bool processSectionsReduction(mlir::Location currentLocation) const; - bool processTo(llvm::SmallVectorImpl &result) const; - bool - processUseDeviceAddr(llvm::SmallVectorImpl &operands, - llvm::SmallVectorImpl &useDeviceTypes, - llvm::SmallVectorImpl &useDeviceLocs, - llvm::SmallVectorImpl - &useDeviceSymbols) const; - bool - processUseDevicePtr(llvm::SmallVectorImpl &operands, - llvm::SmallVectorImpl &useDeviceTypes, - llvm::SmallVectorImpl &useDeviceLocs, - llvm::SmallVectorImpl - &useDeviceSymbols) const; + MapClauseOps &result) const; + bool processReduction(mlir::Location currentLocation, + ReductionClauseOps &result) const; + bool processTargetReduction(TargetReductionClauseOps &result) const; + bool processSectionsReduction(mlir::Location currentLocation, + ReductionClauseOps &result) const; + bool processTo(EnterLinkToClauseOps &result) const; + bool processUseDeviceAddr(UseDeviceClauseOps &result) const; + bool processUseDevicePtr(UseDeviceClauseOps &result) const; template bool processMotionClauses(Fortran::lower::StatementContext &stmtCtx, - llvm::SmallVectorImpl &mapOperands); + MapClauseOps &result); // Call this method for these clauses that should be supported but are not // implemented yet. It triggers a compilation error if any of the given @@ -189,8 +151,7 @@ class ClauseProcessor { template bool ClauseProcessor::processMotionClauses( - Fortran::lower::StatementContext &stmtCtx, - llvm::SmallVectorImpl &mapOperands) { + Fortran::lower::StatementContext &stmtCtx, MapClauseOps &result) { return findRepeatableClause( [&](const T *motionClause, const Fortran::parser::CharBlock &source) { mlir::Location clauseLocation = converter.genLocation(source); @@ -232,7 +193,7 @@ bool ClauseProcessor::processMotionClauses( mapTypeBits), mlir::omp::VariableCaptureKind::ByRef, symAddr.getType()); - mapOperands.push_back(mapOp); + result.mapVars.push_back(mapOp); } }); } diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index 717b8cc0276a3..5c27a91bd469f 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -213,21 +213,23 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { firOpBuilder.restoreInsertionPoint(unstructuredSectionsIP); } } - } else if (mlir::isa(op)) { - // Update the original variable just before exiting the worksharing - // loop. Conversion as follows: + } else if (mlir::isa(op)) { + // TODO Check that the change from WsLoopOp to LoopNestOp didn't + // break anything here. + // Update the original variable just before exiting the loop. Conversion + // as follows: // - // omp.wsloop { - // omp.wsloop { ... - // ... store - // store ===> %v = arith.addi %iv, %step - // omp.yield %cmp = %step < 0 ? %v < %ub : %v > %ub - // } fir.if %cmp { - // fir.store %v to %loopIV - // ^%lpv_update_blk: - // } - // omp.yield + // omp.loopnest { + // omp.loopnest { ... + // ... store + // store ===> %v = arith.addi %iv, %step + // omp.yield %cmp = %step < 0 ? %v < %ub : %v > %ub + // } fir.if %cmp { + // fir.store %v to %loopIV + // ^%lpv_update_blk: // } + // omp.yield + // } // // Only generate the compare once in presence of multiple LastPrivate @@ -242,8 +244,8 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { mlir::Value iv = op->getRegion(0).front().getArguments()[0]; mlir::Value ub = - mlir::dyn_cast(op).getUpperBound()[0]; - mlir::Value step = mlir::dyn_cast(op).getStep()[0]; + mlir::cast(op).getUpperBound()[0]; + mlir::Value step = mlir::cast(op).getStep()[0]; // v = iv + step // cmp = step < 0 ? v < ub : v > ub diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h index 9f7301df07598..c22d1e966df90 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h @@ -24,6 +24,7 @@ namespace omp { class DataSharingProcessor { public: + // TODO Replace with PrivateClauseOps. /// Collects all the information needed for delayed privatization. This can be /// used by ops with data-sharing clauses to properly generate their regions /// (e.g. add region arguments) and map the original SSA values to their diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index e499e16c19e04..fdb8ef4977bc9 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -38,38 +38,6 @@ using namespace Fortran::lower::omp; -//===----------------------------------------------------------------------===// -// Code generation helper functions -//===----------------------------------------------------------------------===// - -static Fortran::lower::pft::Evaluation * -getCollapsedLoopEval(Fortran::lower::pft::Evaluation &eval, int collapseValue) { - // Return the Evaluation of the innermost collapsed loop, or the current one - // if there was no COLLAPSE. - if (collapseValue == 0) - return &eval; - - Fortran::lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation(); - for (int i = 1; i < collapseValue; i++) { - // The nested evaluations should be DoConstructs (i.e. they should form - // a loop nest). Each DoConstruct is a tuple . - assert(curEval->isA()); - curEval = &*std::next(curEval->getNestedEvaluations().begin()); - } - return curEval; -} - -static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter, - Fortran::lower::pft::Evaluation &eval, - int collapseValue = 0) { - Fortran::lower::pft::Evaluation *curEval = - getCollapsedLoopEval(eval, collapseValue); - - for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations()) - converter.genEval(e); -} - //===----------------------------------------------------------------------===// // HostClausesInsertionGuard //===----------------------------------------------------------------------===// @@ -139,6 +107,113 @@ class HostClausesInsertionGuard { } }; +//===----------------------------------------------------------------------===// +// OpWithBodyGenInfo +//===----------------------------------------------------------------------===// + +struct OpWithBodyGenInfo { + /// A type for a code-gen callback function. This takes as argument the op for + /// which the code is being generated and returns the arguments of the op's + /// region. + using GenOMPRegionEntryCBFn = + std::function( + mlir::Operation *)>; + + OpWithBodyGenInfo(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + mlir::Location loc, Fortran::lower::pft::Evaluation &eval) + : converter(converter), semaCtx(semaCtx), loc(loc), eval(eval) {} + + OpWithBodyGenInfo &setGenNested(bool value) { + genNested = value; + return *this; + } + + OpWithBodyGenInfo &setOuterCombined(bool value) { + outerCombined = value; + return *this; + } + + OpWithBodyGenInfo &setClauses(const Fortran::parser::OmpClauseList *value) { + clauses = value; + return *this; + } + + OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) { + dsp = value; + return *this; + } + + OpWithBodyGenInfo & + setReductions(llvm::ArrayRef symbols, + llvm::ArrayRef types) { + reductionSymbols = symbols; + reductionTypes = types; + return *this; + } + + OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) { + genRegionEntryCB = value; + return *this; + } + + /// [inout] converter to use for the clauses. + Fortran::lower::AbstractConverter &converter; + /// [in] Semantics context + Fortran::semantics::SemanticsContext &semaCtx; + /// [in] location in source code. + mlir::Location loc; + /// [in] current PFT node/evaluation. + Fortran::lower::pft::Evaluation &eval; + /// [in] whether to generate FIR for nested evaluations + bool genNested = true; + /// [in] is this an outer operation - prevents privatization. + bool outerCombined = false; + /// [in] list of clauses to process. + const Fortran::parser::OmpClauseList *clauses = nullptr; + /// [in] if provided, processes the construct's data-sharing attributes. + DataSharingProcessor *dsp = nullptr; + /// [in] if provided, list of reduction symbols + llvm::ArrayRef reductionSymbols; + /// [in] if provided, list of reduction types + llvm::ArrayRef reductionTypes; + /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block + /// is created in the region. + GenOMPRegionEntryCBFn genRegionEntryCB = nullptr; +}; + +//===----------------------------------------------------------------------===// +// Code generation helper functions +//===----------------------------------------------------------------------===// + +static Fortran::lower::pft::Evaluation * +getCollapsedLoopEval(Fortran::lower::pft::Evaluation &eval, int collapseValue) { + // Return the Evaluation of the innermost collapsed loop, or the current one + // if there was no COLLAPSE. + if (collapseValue == 0) + return &eval; + + Fortran::lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation(); + for (int i = 1; i < collapseValue; i++) { + // The nested evaluations should be DoConstructs (i.e. they should form + // a loop nest). Each DoConstruct is a tuple . + assert(curEval->isA()); + curEval = &*std::next(curEval->getNestedEvaluations().begin()); + } + return curEval; +} + +static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter, + Fortran::lower::pft::Evaluation &eval, + int collapseValue = 0) { + Fortran::lower::pft::Evaluation *curEval = + getCollapsedLoopEval(eval, collapseValue); + + for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations()) + converter.genEval(e); +} + static fir::GlobalOp globalInitialization( Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder, const Fortran::semantics::Symbol &sym, @@ -282,268 +357,80 @@ static void threadPrivatizeVars(Fortran::lower::AbstractConverter &converter, firOpBuilder.restoreInsertionPoint(insPt); } -static mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter, - std::size_t loopVarTypeSize) { - // OpenMP runtime requires 32-bit or 64-bit loop variables. - loopVarTypeSize = loopVarTypeSize * 8; - if (loopVarTypeSize < 32) { - loopVarTypeSize = 32; - } else if (loopVarTypeSize > 64) { - loopVarTypeSize = 64; - mlir::emitWarning(converter.getCurrentLocation(), - "OpenMP loop iteration variable cannot have more than 64 " - "bits size and will be narrowed into 64 bits."); - } - assert((loopVarTypeSize == 32 || loopVarTypeSize == 64) && - "OpenMP loop iteration variable size must be transformed into 32-bit " - "or 64-bit"); - return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize); -} +/// Create the body (block) for an OpenMP Operation. +/// +/// \param [in] op - the operation the body belongs to. +/// \param [in] info - options controlling code-gen for the construction. +template +static void createBodyOfOp(Op &op, const OpWithBodyGenInfo &info) { + fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); -static mlir::Operation * -createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter, - mlir::Location loc, mlir::Value indexVal, - const Fortran::semantics::Symbol *sym) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint(); - firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); + auto insertMarker = [](fir::FirOpBuilder &builder) { + mlir::Value undef = builder.create(builder.getUnknownLoc(), + builder.getIndexType()); + return undef.getDefiningOp(); + }; - mlir::Type tempTy = converter.genType(*sym); - mlir::Value temp = firOpBuilder.create( - loc, tempTy, /*pinned=*/true, /*lengthParams=*/mlir::ValueRange{}, - /*shapeParams*/ mlir::ValueRange{}, - llvm::ArrayRef{ - fir::getAdaptToByRefAttr(firOpBuilder)}); - converter.bindSymbol(*sym, temp); - firOpBuilder.restoreInsertionPoint(insPt); - mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal); - mlir::Operation *storeOp = firOpBuilder.create( - loc, cvtVal, converter.getSymbolAddress(*sym)); - return storeOp; -} + // If an argument for the region is provided then create the block with that + // argument. Also update the symbol's address with the mlir argument value. + // e.g. For loops the argument is the induction variable. And all further + // uses of the induction variable should use this mlir value. + auto regionArgs = + [&]() -> llvm::SmallVector { + if (info.genRegionEntryCB != nullptr) { + return info.genRegionEntryCB(op); + } -static mlir::Value -calculateTripCount(Fortran::lower::AbstractConverter &converter, - mlir::Location loc, llvm::ArrayRef lbs, - llvm::ArrayRef ubs, - llvm::ArrayRef steps) { - using namespace mlir::arith; - assert(lbs.size() == ubs.size() && lbs.size() == steps.size() && - !lbs.empty() && "Invalid bounds or step"); + firOpBuilder.createBlock(&op.getRegion()); + return {}; + }(); + // Mark the earliest insertion point. + mlir::Operation *marker = insertMarker(firOpBuilder); - fir::FirOpBuilder &b = converter.getFirOpBuilder(); + // If it is an unstructured region and is not the outer region of a combined + // construct, create empty blocks for all evaluations. + if (info.eval.lowerAsUnstructured() && !info.outerCombined) + Fortran::lower::createEmptyRegionBlocks( + firOpBuilder, info.eval.getNestedEvaluations()); - // Get the bit width of an integer-like type. - auto widthOf = [](mlir::Type ty) -> unsigned { - if (mlir::isa(ty)) { - return mlir::IndexType::kInternalStorageBitWidth; - } - if (auto tyInt = mlir::dyn_cast(ty)) { - return tyInt.getWidth(); - } - llvm_unreachable("Unexpected type"); - }; + // Start with privatization, so that the lowering of the nested + // code will use the right symbols. + // TODO Check that nothing broke from replacing WsLoopOp and SimdLoopOp here. + constexpr bool isLoop = std::is_same_v; + bool privatize = info.clauses && !info.outerCombined; - // For a type that is either IntegerType or IndexType, return the - // equivalent IntegerType. In the former case this is a no-op. - auto asIntTy = [&](mlir::Type ty) -> mlir::IntegerType { - if (ty.isIndex()) { - return mlir::IntegerType::get(ty.getContext(), widthOf(ty)); + firOpBuilder.setInsertionPoint(marker); + std::optional tempDsp; + if (privatize) { + if (!info.dsp) { + tempDsp.emplace(info.converter, *info.clauses, info.eval); + tempDsp->processStep1(); } - assert(ty.isIntOrIndex() && "Unexpected type"); - return mlir::cast(ty); - }; + } - // For two given values, establish a common signless IntegerType - // that can represent any value of type of x and of type of y, - // and return the pair of x, y converted to the new type. - auto unifyToSignless = - [&](fir::FirOpBuilder &b, mlir::Value x, - mlir::Value y) -> std::pair { - auto tyX = asIntTy(x.getType()), tyY = asIntTy(y.getType()); - unsigned width = std::max(widthOf(tyX), widthOf(tyY)); - auto wideTy = mlir::IntegerType::get(b.getContext(), width, - mlir::IntegerType::Signless); - return std::make_pair(b.createConvert(loc, wideTy, x), - b.createConvert(loc, wideTy, y)); - }; + if constexpr (std::is_same_v) { + threadPrivatizeVars(info.converter, info.eval); + if (info.clauses) { + firOpBuilder.setInsertionPoint(marker); + CopyinClauseOps clauseOps; + ClauseProcessor(info.converter, info.semaCtx, *info.clauses) + .processCopyin(clauseOps); + } + } - // Start with signless i32 by default. - auto tripCount = b.createIntegerConstant(loc, b.getI32Type(), 1); - - for (auto [origLb, origUb, origStep] : llvm::zip(lbs, ubs, steps)) { - auto tmpS0 = b.createIntegerConstant(loc, origStep.getType(), 0); - auto [step, step0] = unifyToSignless(b, origStep, tmpS0); - auto reverseCond = b.create(loc, CmpIPredicate::slt, step, step0); - auto negStep = b.create(loc, step0, step); - mlir::Value absStep = b.create(loc, reverseCond, negStep, step); - - auto [lb, ub] = unifyToSignless(b, origLb, origUb); - auto start = b.create(loc, reverseCond, ub, lb); - auto end = b.create(loc, reverseCond, lb, ub); - - mlir::Value range = b.create(loc, end, start); - auto rangeCond = b.create(loc, CmpIPredicate::slt, end, start); - std::tie(range, absStep) = unifyToSignless(b, range, absStep); - // numSteps = (range /u absStep) + 1 - auto numSteps = - b.create(loc, b.create(loc, range, absStep), - b.createIntegerConstant(loc, range.getType(), 1)); - - auto trip0 = b.createIntegerConstant(loc, numSteps.getType(), 0); - auto loopTripCount = b.create(loc, rangeCond, trip0, numSteps); - auto [totalTC, thisTC] = unifyToSignless(b, tripCount, loopTripCount); - tripCount = b.create(loc, totalTC, thisTC); - } - - return tripCount; -} - -struct OpWithBodyGenInfo { - /// A type for a code-gen callback function. This takes as argument the op for - /// which the code is being generated and returns the arguments of the op's - /// region. - using GenOMPRegionEntryCBFn = - std::function( - mlir::Operation *)>; - - OpWithBodyGenInfo(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - mlir::Location loc, Fortran::lower::pft::Evaluation &eval) - : converter(converter), semaCtx(semaCtx), loc(loc), eval(eval) {} - - OpWithBodyGenInfo &setGenNested(bool value) { - genNested = value; - return *this; - } - - OpWithBodyGenInfo &setOuterCombined(bool value) { - outerCombined = value; - return *this; - } - - OpWithBodyGenInfo &setClauses(const Fortran::parser::OmpClauseList *value) { - clauses = value; - return *this; - } - - OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) { - dsp = value; - return *this; - } - - OpWithBodyGenInfo & - setReductions(llvm::SmallVector *value1, - llvm::SmallVector *value2) { - reductionSymbols = value1; - reductionTypes = value2; - return *this; - } - - OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) { - genRegionEntryCB = value; - return *this; - } - - /// [inout] converter to use for the clauses. - Fortran::lower::AbstractConverter &converter; - /// [in] Semantics context - Fortran::semantics::SemanticsContext &semaCtx; - /// [in] location in source code. - mlir::Location loc; - /// [in] current PFT node/evaluation. - Fortran::lower::pft::Evaluation &eval; - /// [in] whether to generate FIR for nested evaluations - bool genNested = true; - /// [in] is this an outer operation - prevents privatization. - bool outerCombined = false; - /// [in] list of clauses to process. - const Fortran::parser::OmpClauseList *clauses = nullptr; - /// [in] if provided, processes the construct's data-sharing attributes. - DataSharingProcessor *dsp = nullptr; - /// [in] if provided, list of reduction symbols - llvm::SmallVector *reductionSymbols = - nullptr; - /// [in] if provided, list of reduction types - llvm::SmallVector *reductionTypes = nullptr; - /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block - /// is created in the region. - GenOMPRegionEntryCBFn genRegionEntryCB = nullptr; -}; - -/// Create the body (block) for an OpenMP Operation. -/// -/// \param [in] op - the operation the body belongs to. -/// \param [in] info - options controlling code-gen for the construction. -template -static void createBodyOfOp(Op &op, OpWithBodyGenInfo &info) { - fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); - - auto insertMarker = [](fir::FirOpBuilder &builder) { - mlir::Value undef = builder.create(builder.getUnknownLoc(), - builder.getIndexType()); - return undef.getDefiningOp(); - }; - - // If an argument for the region is provided then create the block with that - // argument. Also update the symbol's address with the mlir argument value. - // e.g. For loops the argument is the induction variable. And all further - // uses of the induction variable should use this mlir value. - auto regionArgs = - [&]() -> llvm::SmallVector { - if (info.genRegionEntryCB != nullptr) { - return info.genRegionEntryCB(op); - } - - firOpBuilder.createBlock(&op.getRegion()); - return {}; - }(); - // Mark the earliest insertion point. - mlir::Operation *marker = insertMarker(firOpBuilder); - - // If it is an unstructured region and is not the outer region of a combined - // construct, create empty blocks for all evaluations. - if (info.eval.lowerAsUnstructured() && !info.outerCombined) - Fortran::lower::createEmptyRegionBlocks( - firOpBuilder, info.eval.getNestedEvaluations()); - - // Start with privatization, so that the lowering of the nested - // code will use the right symbols. - constexpr bool isLoop = std::is_same_v || - std::is_same_v; - bool privatize = info.clauses && !info.outerCombined; - - firOpBuilder.setInsertionPoint(marker); - std::optional tempDsp; - if (privatize) { - if (!info.dsp) { - tempDsp.emplace(info.converter, *info.clauses, info.eval); - tempDsp->processStep1(); - } - } - - if constexpr (std::is_same_v) { - threadPrivatizeVars(info.converter, info.eval); - if (info.clauses) { - firOpBuilder.setInsertionPoint(marker); - ClauseProcessor(info.converter, info.semaCtx, *info.clauses) - .processCopyin(); - } - } - - if (info.genNested) { - // genFIR(Evaluation&) tries to patch up unterminated blocks, causing - // a lot of complications for our approach if the terminator generation - // is delayed past this point. Insert a temporary terminator here, then - // delete it. - firOpBuilder.setInsertionPointToEnd(&op.getRegion().back()); - auto *temp = Fortran::lower::genOpenMPTerminator( - firOpBuilder, op.getOperation(), info.loc); - firOpBuilder.setInsertionPointAfter(marker); - genNestedEvaluations(info.converter, info.eval); - temp->erase(); - } + if (info.genNested) { + // genFIR(Evaluation&) tries to patch up unterminated blocks, causing + // a lot of complications for our approach if the terminator generation + // is delayed past this point. Insert a temporary terminator here, then + // delete it. + firOpBuilder.setInsertionPointToEnd(&op.getRegion().back()); + auto *temp = Fortran::lower::genOpenMPTerminator( + firOpBuilder, op.getOperation(), info.loc); + firOpBuilder.setInsertionPointAfter(marker); + genNestedEvaluations(info.converter, info.eval); + temp->erase(); + } // Get or create a unique exiting block from the given region, or // return nullptr if there is no exiting block. @@ -672,453 +559,43 @@ static void genBodyOfTargetDataOp( genNestedEvaluations(converter, eval); } -template -static OpTy genOpWithBody(OpWithBodyGenInfo &info, Args &&...args) { - auto op = info.converter.getFirOpBuilder().create( - info.loc, std::forward(args)...); - createBodyOfOp(op, info); - return op; -} - -static mlir::omp::MasterOp -genMasterOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation) { - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested), - /*resultTypes=*/mlir::TypeRange()); -} - -static mlir::omp::OrderedRegionOp -genOrderedRegionOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation) { - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested), - /*simd=*/false); -} - -static bool evalHasSiblings(Fortran::lower::pft::Evaluation &eval) { - return eval.parent.visit(Fortran::common::visitors{ - [&](const Fortran::lower::pft::Program &parent) { - return parent.getUnits().size() + parent.getCommonBlocks().size() > 1; - }, - [&](const Fortran::lower::pft::Evaluation &parent) { - for (auto &sibling : *parent.evaluationList) - if (&sibling != &eval && !sibling.isEndStmt()) - return true; - - return false; - }, - [&](const auto &parent) { - for (auto &sibling : parent.evaluationList) - if (&sibling != &eval && !sibling.isEndStmt()) - return true; +// This functions creates a block for the body of the targetOp's region. It adds +// all the symbols present in mapSymbols as block arguments to this block. +static void genBodyOfTargetOp( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::omp::TargetOp &targetOp, + const llvm::SmallVector &mapSymTypes, + const llvm::SmallVector &mapSymLocs, + const llvm::SmallVector &mapSymbols, + const mlir::Location ¤tLocation) { + assert(mapSymTypes.size() == mapSymLocs.size()); - return false; - }}); -} + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + mlir::Region ®ion = targetOp.getRegion(); -static mlir::omp::ParallelOp -genParallelOp(Fortran::lower::AbstractConverter &converter, - Fortran::lower::SymMap &symTable, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList, - bool outerCombined = false) { - Fortran::lower::StatementContext stmtCtx; - mlir::Value ifClauseOperand, numThreadsClauseOperand; - mlir::omp::ClauseProcBindKindAttr procBindKindAttr; - llvm::SmallVector allocateOperands, allocatorOperands, - reductionVars; - llvm::SmallVector reductionDeclSymbols; - llvm::SmallVector reductionSymbols; - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Parallel, - ifClauseOperand); - cp.processProcBind(procBindKindAttr); - cp.processDefault(); - cp.processAllocate(allocatorOperands, allocateOperands); - if (!outerCombined) - cp.processReduction(currentLocation, reductionVars, reductionDeclSymbols, - &reductionSymbols); + auto *regionBlock = + firOpBuilder.createBlock(®ion, {}, mapSymTypes, mapSymLocs); - llvm::SmallVector reductionTypes; - reductionTypes.reserve(reductionVars.size()); - llvm::transform(reductionVars, std::back_inserter(reductionTypes), - [](mlir::Value v) { return v.getType(); }); + // Clones the `bounds` placing them inside the target region and returns them. + auto cloneBound = [&](mlir::Value bound) { + if (mlir::isMemoryEffectFree(bound.getDefiningOp())) { + mlir::Operation *clonedOp = bound.getDefiningOp()->clone(); + regionBlock->push_back(clonedOp); + return clonedOp->getResult(0); - auto reductionCallback = [&](mlir::Operation *op) { - llvm::SmallVector locs(reductionVars.size(), - currentLocation); - auto *block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {}, - reductionTypes, locs); - for (auto [arg, prv] : - llvm::zip_equal(reductionSymbols, block->getArguments())) { - converter.bindSymbol(*arg, prv); } - return reductionSymbols; + TODO(converter.getCurrentLocation(), + "target map clause operand unsupported bound type"); }; - auto offloadModOp = - llvm::cast(*converter.getModuleOp()); - mlir::omp::TargetOp targetOp = - findParentTargetOp(converter.getFirOpBuilder()); - - bool mustEvalOutsideTarget = - targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval); - if (mustEvalOutsideTarget) { - HostClausesInsertionGuard guard(converter.getFirOpBuilder()); - cp.processNumThreads(stmtCtx, numThreadsClauseOperand); - } else { - cp.processNumThreads(stmtCtx, numThreadsClauseOperand); - } - - OpWithBodyGenInfo genInfo = - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setOuterCombined(outerCombined) - .setClauses(&clauseList) - .setReductions(&reductionSymbols, &reductionTypes) - .setGenRegionEntryCb(reductionCallback); - - if (!enableDelayedPrivatization) { - auto parallelOp = genOpWithBody( - genInfo, - /*resultTypes=*/mlir::TypeRange(), ifClauseOperand, - numThreadsClauseOperand, allocateOperands, allocatorOperands, - reductionVars, - reductionDeclSymbols.empty() - ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - reductionDeclSymbols), - procBindKindAttr, /*private_vars=*/llvm::SmallVector{}, - /*privatizers=*/nullptr); - - if (mustEvalOutsideTarget) { - if (numThreadsClauseOperand) - targetOp.getNumThreadsMutable().assign(numThreadsClauseOperand); - } else { - if (numThreadsClauseOperand) - parallelOp.getNumThreadsVarMutable().assign(numThreadsClauseOperand); - } - - return parallelOp; - } - - bool privatize = !outerCombined; - DataSharingProcessor dsp(converter, clauseList, eval, - /*useDelayedPrivatization=*/true, &symTable); - - if (privatize) - dsp.processStep1(); - - const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo(); - - auto genRegionEntryCB = [&](mlir::Operation *op) { - auto parallelOp = llvm::cast(op); - - llvm::SmallVector reductionLocs(reductionVars.size(), - currentLocation); - - mlir::OperandRange privateVars = parallelOp.getPrivateVars(); - mlir::Region ®ion = parallelOp.getRegion(); - - llvm::SmallVector privateVarTypes = reductionTypes; - privateVarTypes.reserve(privateVarTypes.size() + privateVars.size()); - llvm::transform(privateVars, std::back_inserter(privateVarTypes), - [](mlir::Value v) { return v.getType(); }); - - llvm::SmallVector privateVarLocs = reductionLocs; - privateVarLocs.reserve(privateVarLocs.size() + privateVars.size()); - llvm::transform(privateVars, std::back_inserter(privateVarLocs), - [](mlir::Value v) { return v.getLoc(); }); - - converter.getFirOpBuilder().createBlock(®ion, /*insertPt=*/{}, - privateVarTypes, privateVarLocs); - - llvm::SmallVector allSymbols = - reductionSymbols; - allSymbols.append(delayedPrivatizationInfo.symbols); - for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) { - converter.bindSymbol(*arg, prv); - } - - return allSymbols; - }; - - // TODO Merge with the reduction CB. - genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp); - - llvm::SmallVector privatizers( - delayedPrivatizationInfo.privatizers.begin(), - delayedPrivatizationInfo.privatizers.end()); - - auto parallelOp = genOpWithBody( - genInfo, - /*resultTypes=*/mlir::TypeRange(), ifClauseOperand, - /*num_threads_var=*/nullptr, allocateOperands, allocatorOperands, - reductionVars, - reductionDeclSymbols.empty() - ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - reductionDeclSymbols), - procBindKindAttr, delayedPrivatizationInfo.originalAddresses, - delayedPrivatizationInfo.privatizers.empty() - ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - privatizers)); - - if (mustEvalOutsideTarget) { - if (numThreadsClauseOperand) - targetOp.getNumThreadsMutable().assign(numThreadsClauseOperand); - } else { - if (numThreadsClauseOperand) - parallelOp.getNumThreadsVarMutable().assign(numThreadsClauseOperand); - } - - return parallelOp; -} - -static mlir::omp::SectionOp -genSectionOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList §ionsClauseList) { - // Currently only private/firstprivate clause is handled, and - // all privatization is done within `omp.section` operations. - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setClauses(§ionsClauseList)); -} - -static mlir::omp::SingleOp -genSingleOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &beginClauseList, - const Fortran::parser::OmpClauseList &endClauseList) { - llvm::SmallVector allocateOperands, allocatorOperands; - llvm::SmallVector copyPrivateVars; - llvm::SmallVector copyPrivateFuncs; - mlir::UnitAttr nowaitAttr; - - ClauseProcessor cp(converter, semaCtx, beginClauseList); - cp.processAllocate(allocatorOperands, allocateOperands); - - ClauseProcessor ecp(converter, semaCtx, endClauseList); - ecp.processNowait(nowaitAttr); - ecp.processCopyPrivate(currentLocation, copyPrivateVars, copyPrivateFuncs); - - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setClauses(&beginClauseList), - allocateOperands, allocatorOperands, copyPrivateVars, - copyPrivateFuncs.empty() - ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - copyPrivateFuncs), - nowaitAttr); -} - -static mlir::omp::TaskOp -genTaskOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList) { - Fortran::lower::StatementContext stmtCtx; - mlir::Value ifClauseOperand, finalClauseOperand, priorityClauseOperand; - mlir::UnitAttr untiedAttr, mergeableAttr; - llvm::SmallVector dependTypeOperands; - llvm::SmallVector allocateOperands, allocatorOperands, - dependOperands; - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Task, - ifClauseOperand); - cp.processAllocate(allocatorOperands, allocateOperands); - cp.processDefault(); - cp.processFinal(stmtCtx, finalClauseOperand); - cp.processUntied(untiedAttr); - cp.processMergeable(mergeableAttr); - cp.processPriority(stmtCtx, priorityClauseOperand); - cp.processDepend(dependTypeOperands, dependOperands); - cp.processTODO( - currentLocation, llvm::omp::Directive::OMPD_task); - - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setClauses(&clauseList), - ifClauseOperand, finalClauseOperand, untiedAttr, mergeableAttr, - /*in_reduction_vars=*/mlir::ValueRange(), - /*in_reductions=*/nullptr, priorityClauseOperand, - dependTypeOperands.empty() - ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - dependTypeOperands), - dependOperands, allocateOperands, allocatorOperands); -} - -static mlir::omp::TaskGroupOp -genTaskGroupOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList) { - llvm::SmallVector allocateOperands, allocatorOperands; - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processAllocate(allocatorOperands, allocateOperands); - cp.processTODO( - currentLocation, llvm::omp::Directive::OMPD_taskgroup); - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setClauses(&clauseList), - /*task_reduction_vars=*/mlir::ValueRange(), - /*task_reductions=*/nullptr, allocateOperands, allocatorOperands); -} - -static mlir::omp::DataOp -genDataOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList) { - Fortran::lower::StatementContext stmtCtx; - mlir::Value ifClauseOperand, deviceOperand; - llvm::SmallVector mapOperands, devicePtrOperands, - deviceAddrOperands; - llvm::SmallVector useDeviceTypes; - llvm::SmallVector useDeviceLocs; - llvm::SmallVector useDeviceSymbols; - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetData, - ifClauseOperand); - cp.processDevice(stmtCtx, deviceOperand); - cp.processUseDevicePtr(devicePtrOperands, useDeviceTypes, useDeviceLocs, - useDeviceSymbols); - cp.processUseDeviceAddr(deviceAddrOperands, useDeviceTypes, useDeviceLocs, - useDeviceSymbols); - cp.processMap(currentLocation, llvm::omp::Directive::OMPD_target_data, - stmtCtx, mapOperands); - - auto dataOp = converter.getFirOpBuilder().create( - currentLocation, ifClauseOperand, deviceOperand, devicePtrOperands, - deviceAddrOperands, mapOperands); - genBodyOfTargetDataOp(converter, semaCtx, eval, genNested, dataOp, - useDeviceTypes, useDeviceLocs, useDeviceSymbols, - currentLocation); - return dataOp; -} - -template -static OpTy -genEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - Fortran::lower::StatementContext stmtCtx; - mlir::Value ifClauseOperand, deviceOperand; - mlir::UnitAttr nowaitAttr; - llvm::SmallVector mapOperands, dependOperands; - llvm::SmallVector dependTypeOperands; - - Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName; - // GCC 9.3.0 emits a (probably) bogus warning about an unused variable. - [[maybe_unused]] llvm::omp::Directive directive; - if constexpr (std::is_same_v) { - directiveName = - Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetEnterData; - directive = llvm::omp::Directive::OMPD_target_enter_data; - } else if constexpr (std::is_same_v) { - directiveName = - Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetExitData; - directive = llvm::omp::Directive::OMPD_target_exit_data; - } else if constexpr (std::is_same_v) { - directiveName = - Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetUpdate; - directive = llvm::omp::Directive::OMPD_target_update; - } else { - return nullptr; - } - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(directiveName, ifClauseOperand); - cp.processDevice(stmtCtx, deviceOperand); - cp.processDepend(dependTypeOperands, dependOperands); - cp.processNowait(nowaitAttr); - - if constexpr (std::is_same_v) { - cp.processMotionClauses(stmtCtx, - mapOperands); - cp.processMotionClauses(stmtCtx, - mapOperands); - - } else { - cp.processMap(currentLocation, directive, stmtCtx, mapOperands); - } - - return firOpBuilder.create( - currentLocation, ifClauseOperand, deviceOperand, - dependTypeOperands.empty() - ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - dependTypeOperands), - dependOperands, nowaitAttr, mapOperands); -} - -// This functions creates a block for the body of the targetOp's region. It adds -// all the symbols present in mapSymbols as block arguments to this block. -static void genBodyOfTargetOp( - Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::omp::TargetOp &targetOp, - const llvm::SmallVector &mapSymTypes, - const llvm::SmallVector &mapSymLocs, - const llvm::SmallVector &mapSymbols, - const mlir::Location ¤tLocation) { - assert(mapSymTypes.size() == mapSymLocs.size()); - - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::Region ®ion = targetOp.getRegion(); - - auto *regionBlock = - firOpBuilder.createBlock(®ion, {}, mapSymTypes, mapSymLocs); - - // Clones the `bounds` placing them inside the target region and returns them. - auto cloneBound = [&](mlir::Value bound) { - if (mlir::isMemoryEffectFree(bound.getDefiningOp())) { - mlir::Operation *clonedOp = bound.getDefiningOp()->clone(); - regionBlock->push_back(clonedOp); - return clonedOp->getResult(0); - } - TODO(converter.getCurrentLocation(), - "target map clause operand unsupported bound type"); - }; - - auto cloneBounds = [cloneBound](llvm::ArrayRef bounds) { - llvm::SmallVector clonedBounds; - for (mlir::Value bound : bounds) - clonedBounds.emplace_back(cloneBound(bound)); - return clonedBounds; - }; + auto cloneBounds = [cloneBound](llvm::ArrayRef bounds) { + llvm::SmallVector clonedBounds; + for (mlir::Value bound : bounds) + clonedBounds.emplace_back(cloneBound(bound)); + return clonedBounds; + }; // Bind the symbols to their corresponding block arguments. for (auto [argIndex, argSymbol] : llvm::enumerate(mapSymbols)) { @@ -1187,54 +664,1053 @@ static void genBodyOfTargetOp( firOpBuilder.createTemporary(val.getLoc(), val.getType()); firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); - llvm::SmallVector bounds; - std::stringstream name; - firOpBuilder.setInsertionPoint(targetOp); - mlir::Value mapOp = createMapInfoOp( - firOpBuilder, copyVal.getLoc(), copyVal, mlir::Value{}, name.str(), - bounds, llvm::SmallVector{}, - static_cast< - std::underlying_type_t>( - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), - mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); - targetOp.getMapOperandsMutable().append(mapOp); - mlir::Value clonedValArg = - region.addArgument(copyVal.getType(), copyVal.getLoc()); - firOpBuilder.setInsertionPointToStart(regionBlock); - auto loadOp = firOpBuilder.create(clonedValArg.getLoc(), - clonedValArg); - val.replaceUsesWithIf( - loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) { - return use.getOwner()->getBlock() == regionBlock; - }); - firOpBuilder.setInsertionPoint(regionBlock, savedIP); - } + llvm::SmallVector bounds; + std::stringstream name; + firOpBuilder.setInsertionPoint(targetOp); + mlir::Value mapOp = createMapInfoOp( + firOpBuilder, copyVal.getLoc(), copyVal, mlir::Value{}, name.str(), + bounds, llvm::SmallVector{}, + static_cast< + std::underlying_type_t>( + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), + mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); + targetOp.getMapOperandsMutable().append(mapOp); + mlir::Value clonedValArg = + region.addArgument(copyVal.getType(), copyVal.getLoc()); + firOpBuilder.setInsertionPointToStart(regionBlock); + auto loadOp = firOpBuilder.create(clonedValArg.getLoc(), + clonedValArg); + val.replaceUsesWithIf( + loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) { + return use.getOwner()->getBlock() == regionBlock; + }); + firOpBuilder.setInsertionPoint(regionBlock, savedIP); + } + } + valuesDefinedAbove.clear(); + mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); + } + + // Insert dummy instruction to remember the insertion position. The + // marker will be deleted since there are not uses. + // In the HLFIR flow there are hlfir.declares inserted above while + // setting block arguments. + mlir::Value undefMarker = firOpBuilder.create( + targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType()); + + // Create blocks for unstructured regions. This has to be done since + // blocks are initially allocated with the function as the parent region. + if (eval.lowerAsUnstructured()) { + Fortran::lower::createEmptyRegionBlocks( + firOpBuilder, eval.getNestedEvaluations()); + } + + firOpBuilder.create(currentLocation); + + // Create the insertion point after the marker. + firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); + if (genNested) + genNestedEvaluations(converter, eval); +} + +template +static OpTy genOpWithBody(const OpWithBodyGenInfo &info, Args &&...args) { + auto op = info.converter.getFirOpBuilder().create( + info.loc, std::forward(args)...); + createBodyOfOp(op, info); + return op; +} + +static mlir::Value +calculateTripCount(Fortran::lower::AbstractConverter &converter, + mlir::Location loc, llvm::ArrayRef lbs, + llvm::ArrayRef ubs, + llvm::ArrayRef steps) { + using namespace mlir::arith; + assert(lbs.size() == ubs.size() && lbs.size() == steps.size() && + !lbs.empty() && "Invalid bounds or step"); + + fir::FirOpBuilder &b = converter.getFirOpBuilder(); + + // Get the bit width of an integer-like type. + auto widthOf = [](mlir::Type ty) -> unsigned { + if (mlir::isa(ty)) { + return mlir::IndexType::kInternalStorageBitWidth; + } + if (auto tyInt = mlir::dyn_cast(ty)) { + return tyInt.getWidth(); + } + llvm_unreachable("Unexpected type"); + }; + + // For a type that is either IntegerType or IndexType, return the + // equivalent IntegerType. In the former case this is a no-op. + auto asIntTy = [&](mlir::Type ty) -> mlir::IntegerType { + if (ty.isIndex()) { + return mlir::IntegerType::get(ty.getContext(), widthOf(ty)); + } + assert(ty.isIntOrIndex() && "Unexpected type"); + return mlir::cast(ty); + }; + + // For two given values, establish a common signless IntegerType + // that can represent any value of type of x and of type of y, + // and return the pair of x, y converted to the new type. + auto unifyToSignless = + [&](fir::FirOpBuilder &b, mlir::Value x, + mlir::Value y) -> std::pair { + auto tyX = asIntTy(x.getType()), tyY = asIntTy(y.getType()); + unsigned width = std::max(widthOf(tyX), widthOf(tyY)); + auto wideTy = mlir::IntegerType::get(b.getContext(), width, + mlir::IntegerType::Signless); + return std::make_pair(b.createConvert(loc, wideTy, x), + b.createConvert(loc, wideTy, y)); + }; + + // Start with signless i32 by default. + auto tripCount = b.createIntegerConstant(loc, b.getI32Type(), 1); + + for (auto [origLb, origUb, origStep] : llvm::zip(lbs, ubs, steps)) { + auto tmpS0 = b.createIntegerConstant(loc, origStep.getType(), 0); + auto [step, step0] = unifyToSignless(b, origStep, tmpS0); + auto reverseCond = b.create(loc, CmpIPredicate::slt, step, step0); + auto negStep = b.create(loc, step0, step); + mlir::Value absStep = b.create(loc, reverseCond, negStep, step); + + auto [lb, ub] = unifyToSignless(b, origLb, origUb); + auto start = b.create(loc, reverseCond, ub, lb); + auto end = b.create(loc, reverseCond, lb, ub); + + mlir::Value range = b.create(loc, end, start); + auto rangeCond = b.create(loc, CmpIPredicate::slt, end, start); + std::tie(range, absStep) = unifyToSignless(b, range, absStep); + // numSteps = (range /u absStep) + 1 + auto numSteps = + b.create(loc, b.create(loc, range, absStep), + b.createIntegerConstant(loc, range.getType(), 1)); + + auto trip0 = b.createIntegerConstant(loc, numSteps.getType(), 0); + auto loopTripCount = b.create(loc, rangeCond, trip0, numSteps); + auto [totalTC, thisTC] = unifyToSignless(b, tripCount, loopTripCount); + tripCount = b.create(loc, totalTC, thisTC); + } + + return tripCount; +} + +static bool evalHasSiblings(Fortran::lower::pft::Evaluation &eval) { + return eval.parent.visit(Fortran::common::visitors{ + [&](const Fortran::lower::pft::Program &parent) { + return parent.getUnits().size() + parent.getCommonBlocks().size() > 1; + }, + [&](const Fortran::lower::pft::Evaluation &parent) { + for (auto &sibling : *parent.evaluationList) + if (&sibling != &eval && !sibling.isEndStmt()) + return true; + + return false; + }, + [&](const auto &parent) { + for (auto &sibling : parent.evaluationList) + if (&sibling != &eval && !sibling.isEndStmt()) + return true; + + return false; + }}); +} + +/// Extract the list of function and variable symbols affected by the given +/// 'declare target' directive and return the intended device type for them. +static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, + EnterLinkToClauseOps &enterLinkToClauseOps) { + + // The default capture type + DeviceTypeClauseOps deviceTypeClauseOps = { + mlir::omp::DeclareTargetDeviceType::any}; + const auto &spec = std::get( + declareTargetConstruct.t); + + if (const auto *objectList{ + Fortran::parser::Unwrap(spec.u)}) { + // Case: declare target(func, var1, var2) + gatherFuncAndVarSyms(*objectList, mlir::omp::DeclareTargetCaptureClause::to, + enterLinkToClauseOps.symbolAndClause); + } else if (const auto *clauseList{ + Fortran::parser::Unwrap( + spec.u)}) { + if (clauseList->v.empty()) { + // Case: declare target, implicit capture of function + enterLinkToClauseOps.symbolAndClause.emplace_back( + mlir::omp::DeclareTargetCaptureClause::to, + eval.getOwningProcedure()->getSubprogramSymbol()); + } + + ClauseProcessor cp(converter, semaCtx, *clauseList); + cp.processDeviceType(deviceTypeClauseOps); + cp.processEnter(enterLinkToClauseOps); + cp.processLink(enterLinkToClauseOps); + cp.processTo(enterLinkToClauseOps); + cp.processTODO( + converter.getCurrentLocation(), + llvm::omp::Directive::OMPD_declare_target); + } + + return deviceTypeClauseOps.deviceType; +} + +static void collectDeferredDeclareTargets( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, + llvm::SmallVectorImpl + &deferredDeclareTarget) { + EnterLinkToClauseOps clauseOps; + mlir::omp::DeclareTargetDeviceType devType = getDeclareTargetInfo( + converter, semaCtx, eval, declareTargetConstruct, clauseOps); + // Return the device type only if at least one of the targets for the + // directive is a function or subroutine + mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); + + for (const DeclareTargetCapturePair &symClause : clauseOps.symbolAndClause) { + mlir::Operation *op = mod.lookupSymbol(converter.mangleName( + std::get(symClause))); + + if (!op) { + deferredDeclareTarget.push_back( + {std::get<0>(symClause), devType, std::get<1>(symClause)}); + } + } +} + +static std::optional +getDeclareTargetFunctionDevice( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPDeclareTargetConstruct + &declareTargetConstruct) { + EnterLinkToClauseOps clauseOps; + mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo( + converter, semaCtx, eval, declareTargetConstruct, clauseOps); + + // Return the device type only if at least one of the targets for the + // directive is a function or subroutine + mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); + for (const DeclareTargetCapturePair &symClause : clauseOps.symbolAndClause) { + mlir::Operation *op = mod.lookupSymbol(converter.mangleName( + std::get(symClause))); + + if (mlir::isa_and_nonnull(op)) + return deviceType; + } + + return std::nullopt; +} + +static mlir::Operation * +createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter, + mlir::Location loc, mlir::Value indexVal, + const Fortran::semantics::Symbol *sym) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint(); + firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); + + mlir::Type tempTy = converter.genType(*sym); + mlir::Value temp = firOpBuilder.create( + loc, tempTy, /*pinned=*/true, /*lengthParams=*/mlir::ValueRange{}, + /*shapeParams*/ mlir::ValueRange{}, + llvm::ArrayRef{ + fir::getAdaptToByRefAttr(firOpBuilder)}); + converter.bindSymbol(*sym, temp); + firOpBuilder.restoreInsertionPoint(insPt); + mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal); + mlir::Operation *storeOp = firOpBuilder.create( + loc, cvtVal, converter.getSymbolAddress(*sym)); + return storeOp; +} + +static void +genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, + mlir::Location &loc, + llvm::ArrayRef args) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + auto ®ion = op->getRegion(0); + + std::size_t loopVarTypeSize = 0; + for (const Fortran::semantics::Symbol *arg : args) + loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); + mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); + llvm::SmallVector tiv(args.size(), loopVarType); + llvm::SmallVector locs(args.size(), loc); + firOpBuilder.createBlock(®ion, {}, tiv, locs); + // The argument is not currently in memory, so make a temporary for the + // argument, and store it there, then bind that location to the argument. + mlir::Operation *storeOp = nullptr; + for (auto [argIndex, argSymbol] : llvm::enumerate(args)) { + mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex)); + storeOp = + createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); + } + firOpBuilder.setInsertionPointAfter(storeOp); +} + +static void genReductionVars( + mlir::Operation *op, Fortran::lower::AbstractConverter &converter, + mlir::Location &loc, + llvm::ArrayRef reductionArgs, + llvm::ArrayRef reductionTypes) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + llvm::SmallVector blockArgLocs(reductionArgs.size(), loc); + + mlir::Block *entryBlock = firOpBuilder.createBlock( + &op->getRegion(0), {}, reductionTypes, blockArgLocs); + + // Bind the reduction arguments to their block arguments + for (auto [arg, prv] : + llvm::zip_equal(reductionArgs, entryBlock->getArguments())) { + converter.bindSymbol(*arg, prv); + } +} + +//===----------------------------------------------------------------------===// +// Code generation functions for clauses +//===----------------------------------------------------------------------===// + +// TODO Try to compile, check privatization of simple wsloop/simdloop/distribute +// TODO Move common args and functions into a ConstructProcessor class + +static void +genCriticalDeclareClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + CriticalDeclareOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processHint(clauseOps); +} + +static void genDataClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, DataOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processDevice(stmtCtx, clauseOps); + cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetData, + clauseOps); + cp.processMap(loc, stmtCtx, clauseOps); + cp.processUseDeviceAddr(clauseOps); + cp.processUseDevicePtr(clauseOps); +} + +static void genDistributeClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + DistributeOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processAllocate(clauseOps); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_distribute); +} + +static void genEnterExitUpdateDataClauses( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, mlir::Location loc, + Fortran::parser::OmpIfClause::DirectiveNameModifier directive, + EnterExitUpdateDataOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processDepend(clauseOps); + cp.processDevice(stmtCtx, clauseOps); + cp.processIf(directive, clauseOps); + cp.processNowait(clauseOps); + + if (directive == + Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetUpdate) { + cp.processMotionClauses(stmtCtx, clauseOps); + cp.processMotionClauses(stmtCtx, + clauseOps); + } else { + cp.processMap(loc, stmtCtx, clauseOps); + } +} + +static void genFlushClauses( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const std::optional &objects, + const std::optional> + &clauses, + mlir::Location loc, llvm::SmallVectorImpl &operandRange) { + if (objects) + genObjectList(*objects, converter, operandRange); + + if (clauses && clauses->size() > 0) + TODO(converter.getCurrentLocation(), "Handle OmpMemoryOrderClause"); +} + +static void genLoopNestClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + LoopNestOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processCollapse(loc, eval, clauseOps); +} + +static void +genOrderedRegionClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + OrderedRegionOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_ordered); +} + +static void genParallelClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, bool processReduction, + bool evalNumThreadsOutsideTarget, + ParallelOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processAllocate(clauseOps); + cp.processDefault(); + cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Parallel, + clauseOps); + cp.processProcBind(clauseOps); + + if (processReduction) + cp.processReduction(loc, clauseOps); + + if (evalNumThreadsOutsideTarget) { + HostClausesInsertionGuard guard(converter.getFirOpBuilder()); + cp.processNumThreads(stmtCtx, clauseOps); + } else { + cp.processNumThreads(stmtCtx, clauseOps); + } +} + +static void genSectionsClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + bool clausesFromBeginSections, + SectionsOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + if (clausesFromBeginSections) { + cp.processAllocate(clauseOps); + cp.processSectionsReduction(loc, clauseOps); + } else { + cp.processNowait(clauseOps); + } +} + +static void genSimdLoopClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + SimdLoopOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Simd, + clauseOps); + cp.processReduction(loc, clauseOps); + cp.processSafelen(clauseOps); + cp.processSimdlen(clauseOps); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_simd); +} + +static void genSingleClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &beginClauses, + const Fortran::parser::OmpClauseList &endClauses, + mlir::Location loc, SingleOpClauseOps &clauseOps) { + ClauseProcessor bcp(converter, semaCtx, beginClauses); + bcp.processAllocate(clauseOps); + + ClauseProcessor ecp(converter, semaCtx, endClauses); + ecp.processCopyprivate(loc, clauseOps); + ecp.processNowait(clauseOps); +} + +static void genTargetClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, bool processHostOnlyClauses, + bool processReduction, + TargetOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processDepend(clauseOps); + cp.processDevice(stmtCtx, clauseOps); + cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Target, + clauseOps); + cp.processMap(loc, stmtCtx, clauseOps); + cp.processThreadLimit(stmtCtx, clauseOps); + + if (processHostOnlyClauses) + cp.processNowait(clauseOps); + + if (processReduction) + cp.processTargetReduction(clauseOps); + + cp.processTODO( + loc, llvm::omp::Directive::OMPD_target); +} + +static void genTaskClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, TaskOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processAllocate(clauseOps); + cp.processDefault(); + cp.processDepend(clauseOps); + cp.processFinal(stmtCtx, clauseOps); + cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Task, + clauseOps); + cp.processMergeable(clauseOps); + cp.processPriority(stmtCtx, clauseOps); + cp.processUntied(clauseOps); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_task); +} + +static void genTaskGroupClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + TaskGroupOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processAllocate(clauseOps); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_taskgroup); +} + +static void genTaskLoopClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + TaskLoopOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processAllocate(clauseOps); + cp.processDefault(); + cp.processFinal(stmtCtx, clauseOps); + cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Taskloop, + clauseOps); + cp.processMergeable(clauseOps); + cp.processPriority(stmtCtx, clauseOps); + cp.processReduction(loc, clauseOps); + cp.processUntied(clauseOps); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_taskloop); +} + +static void genTaskWaitClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + TaskWaitOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_taskwait); +} + +static void genTeamsClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, bool evalNumTeamsOutsideTarget, + TeamsOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processAllocate(clauseOps); + cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Teams, + clauseOps); + cp.processDefault(); + + if (evalNumTeamsOutsideTarget) { + HostClausesInsertionGuard guard(converter.getFirOpBuilder()); + cp.processNumTeams(stmtCtx, clauseOps); + cp.processThreadLimit(stmtCtx, clauseOps); + } else { + cp.processNumTeams(stmtCtx, clauseOps); + cp.processThreadLimit(stmtCtx, clauseOps); + } +} + +static void genWsLoopClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &beginClauses, + const Fortran::parser::OmpClauseList *endClauses, + mlir::Location loc, WsloopOpClauseOps &clauseOps) { + ClauseProcessor bcp(converter, semaCtx, beginClauses); + bcp.processOrdered(clauseOps); + bcp.processReduction(loc, clauseOps); + bcp.processSchedule(stmtCtx, clauseOps); + + if (endClauses) { + ClauseProcessor ecp(converter, semaCtx, *endClauses); + ecp.processNowait(clauseOps); + } + + bcp.processTODO( + loc, llvm::omp::Directive::OMPD_do); +} + +//===----------------------------------------------------------------------===// +// Code generation functions for leaf constructs +//===----------------------------------------------------------------------===// + +// TODO Pass OpClauseOps as arg to all genOp + +static mlir::omp::BarrierOp +genBarrierOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + mlir::Location currentLocation) { + return converter.getFirOpBuilder().create( + currentLocation); +} + +static mlir::omp::CriticalOp +genCriticalOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList, + const std::optional &name) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + mlir::FlatSymbolRefAttr nameAttr; + + if (name.has_value()) { + CriticalDeclareOpClauseOps clauseOps; + genCriticalDeclareClauses(converter, semaCtx, clauseList, currentLocation, + clauseOps); + + std::string nameStr = name.value().ToString(); + mlir::ModuleOp module = firOpBuilder.getModule(); + auto global = module.lookupSymbol(nameStr); + if (!global) { + mlir::OpBuilder modBuilder(module.getBodyRegion()); + global = modBuilder.create( + currentLocation, firOpBuilder.getStringAttr(nameStr), + clauseOps.hintAttr); + } + nameAttr = mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(), + global.getSymName()); + } + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested), + nameAttr); +} + +static mlir::omp::DataOp +genDataOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + Fortran::lower::StatementContext stmtCtx; + DataOpClauseOps clauseOps; + genDataClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation, + clauseOps); + + auto dataOp = converter.getFirOpBuilder().create( + currentLocation, clauseOps.ifVar, clauseOps.deviceVar, + clauseOps.useDevicePtrVars, clauseOps.useDeviceAddrVars, + clauseOps.mapVars); + + genBodyOfTargetDataOp(converter, semaCtx, eval, genNested, dataOp, + clauseOps.useDeviceTypes, clauseOps.useDeviceLocs, + clauseOps.useDeviceSymbols, currentLocation); + return dataOp; +} + +static mlir::omp::DistributeOp +genDistributeOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool isComposite, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList, + bool outerCombined = false) { + DistributeOpClauseOps clauseOps; + genDistributeClauses(converter, semaCtx, clauseList, currentLocation, + clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(false) + .setOuterCombined(outerCombined) + .setClauses(&clauseList), + clauseOps.distScheduleStaticAttr, clauseOps.distScheduleChunkSizeVar, + clauseOps.allocateVars, clauseOps.allocatorVars, clauseOps.orderAttr, + isComposite ? converter.getFirOpBuilder().getUnitAttr() : nullptr); +} + +template +static OpTy +genEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + Fortran::lower::StatementContext stmtCtx; + + Fortran::parser::OmpIfClause::DirectiveNameModifier directive; + if constexpr (std::is_same_v) { + directive = + Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetEnterData; + } else if constexpr (std::is_same_v) { + directive = + Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetExitData; + } else if constexpr (std::is_same_v) { + directive = + Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetUpdate; + } else { + llvm_unreachable("Unexpected TARGET data construct"); + } + + EnterExitUpdateDataOpClauseOps clauseOps; + genEnterExitUpdateDataClauses(converter, semaCtx, stmtCtx, clauseList, + currentLocation, directive, clauseOps); + + return firOpBuilder.create( + currentLocation, clauseOps.ifVar, clauseOps.deviceVar, + clauseOps.dependTypeAttrs.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.dependTypeAttrs), + clauseOps.dependVars, clauseOps.nowaitAttr, clauseOps.mapVars); +} + +static mlir::omp::FlushOp +genFlushOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + mlir::Location currentLocation, + const std::optional &objectList, + const std::optional> + &clauseList) { + llvm::SmallVector operandRange; + genFlushClauses(converter, semaCtx, objectList, clauseList, currentLocation, + operandRange); + + return converter.getFirOpBuilder().create( + converter.getCurrentLocation(), operandRange); +} + +static mlir::omp::LoopNestOp +genLoopNestOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList, + const LoopNestOpClauseOps &clauseOps, DataSharingProcessor &dsp) { + auto *nestedEval = + getCollapsedLoopEval(eval, Fortran::lower::getCollapseValue(clauseList)); + + auto ivCallback = [&](mlir::Operation *op) { + genLoopVars(op, converter, currentLocation, clauseOps.loopIV); + return clauseOps.loopIV; + }; + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval) + .setClauses(&clauseList) + .setDataSharingProcessor(&dsp) + .setGenRegionEntryCb(ivCallback) + .setGenNested(true), + clauseOps.loopLBVar, clauseOps.loopUBVar, clauseOps.loopStepVar, + /*inclusive=*/converter.getFirOpBuilder().getUnitAttr()); +} + +static mlir::omp::MasterOp +genMasterOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation) { + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested), + /*resultTypes=*/mlir::TypeRange()); +} + +static mlir::omp::OrderedRegionOp +genOrderedRegionOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + OrderedRegionOpClauseOps clauseOps; + genOrderedRegionClauses(converter, semaCtx, clauseList, currentLocation, + clauseOps); + + // TODO Store clauseOps.parLevelThreadsAttr in op. + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested), + clauseOps.parLevelSimdAttr); +} + +static mlir::omp::ParallelOp +genParallelOp(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + bool isComposite, mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList, + bool outerCombined = false) { + // TODO Distinguish between genParallelOp as block vs wrapper + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + Fortran::lower::StatementContext stmtCtx; + ParallelOpClauseOps clauseOps; + clauseOps.reductionSymbols.emplace(); + + auto offloadModOp = + llvm::cast(*converter.getModuleOp()); + mlir::omp::TargetOp targetOp = findParentTargetOp(firOpBuilder); + + bool evalNumThreadsOutsideTarget = + targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval); + + genParallelClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation, + /*processReduction=*/!outerCombined, + evalNumThreadsOutsideTarget, clauseOps); + + auto reductionCallback = [&](mlir::Operation *op) { + genReductionVars(op, converter, currentLocation, + *clauseOps.reductionSymbols, clauseOps.reductionTypes); + return *clauseOps.reductionSymbols; + }; + + OpWithBodyGenInfo genInfo = + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested) + .setOuterCombined(outerCombined) + .setClauses(&clauseList) + .setReductions(*clauseOps.reductionSymbols, clauseOps.reductionTypes) + .setGenRegionEntryCb(reductionCallback); + + if (!enableDelayedPrivatization) { + auto parallelOp = genOpWithBody( + genInfo, clauseOps.ifVar, /*num_threads_var=*/nullptr, + clauseOps.allocateVars, clauseOps.allocatorVars, + clauseOps.reductionVars, + clauseOps.reductionDeclSymbols.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols), + clauseOps.procBindKindAttr, clauseOps.privateVars, + clauseOps.privatizers.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.privatizers), + isComposite ? firOpBuilder.getUnitAttr() : nullptr); + + if (clauseOps.numThreadsVar) { + if (evalNumThreadsOutsideTarget) + targetOp.getNumThreadsMutable().assign(clauseOps.numThreadsVar); + else + parallelOp.getNumThreadsVarMutable().assign(clauseOps.numThreadsVar); } - valuesDefinedAbove.clear(); - mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); + + return parallelOp; } - // Insert dummy instruction to remember the insertion position. The - // marker will be deleted since there are not uses. - // In the HLFIR flow there are hlfir.declares inserted above while - // setting block arguments. - mlir::Value undefMarker = firOpBuilder.create( - targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType()); + // TODO Integrate delayed privatization better with the new approach. + // - Store delayedPrivatizationInfo.{originalAddresses,privatizers} in + // clauseOps.{privateVars,privatizers}. + // - Outline genRegionEntryCB into composable genPrivatizedVars. + // - Refactor to create the omp.parallel op in a single place and possibly + // only use a single callback. + // - Check whether the external DataSharingProcessor could be used, and skip + // the call to processStep1() here. Perhaps also skip setting it in the + // OpWithBodyGenInfo structure. - // Create blocks for unstructured regions. This has to be done since - // blocks are initially allocated with the function as the parent region. - if (eval.lowerAsUnstructured()) { - Fortran::lower::createEmptyRegionBlocks( - firOpBuilder, eval.getNestedEvaluations()); + bool privatize = !outerCombined; + DataSharingProcessor dsp(converter, clauseList, eval, + /*useDelayedPrivatization=*/true, &symTable); + + if (privatize) + dsp.processStep1(); + + const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo(); + + auto genRegionEntryCB = [&](mlir::Operation *op) { + auto parallelOp = llvm::cast(op); + + llvm::SmallVector reductionLocs( + clauseOps.reductionVars.size(), currentLocation); + + mlir::OperandRange privateVars = parallelOp.getPrivateVars(); + mlir::Region ®ion = parallelOp.getRegion(); + + llvm::SmallVector privateVarTypes = clauseOps.reductionTypes; + privateVarTypes.reserve(privateVarTypes.size() + privateVars.size()); + llvm::transform(privateVars, std::back_inserter(privateVarTypes), + [](mlir::Value v) { return v.getType(); }); + + llvm::SmallVector privateVarLocs = reductionLocs; + privateVarLocs.reserve(privateVarLocs.size() + privateVars.size()); + llvm::transform(privateVars, std::back_inserter(privateVarLocs), + [](mlir::Value v) { return v.getLoc(); }); + + converter.getFirOpBuilder().createBlock(®ion, /*insertPt=*/{}, + privateVarTypes, privateVarLocs); + + llvm::SmallVector allSymbols = + *clauseOps.reductionSymbols; + allSymbols.append(delayedPrivatizationInfo.symbols); + for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) { + converter.bindSymbol(*arg, prv); + } + + return allSymbols; + }; + + // TODO Merge with the reduction CB. + genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp); + + llvm::SmallVector privatizers( + delayedPrivatizationInfo.privatizers.begin(), + delayedPrivatizationInfo.privatizers.end()); + + auto parallelOp = genOpWithBody( + genInfo, clauseOps.ifVar, /*num_threads_var=*/nullptr, + clauseOps.allocateVars, clauseOps.allocatorVars, clauseOps.reductionVars, + clauseOps.reductionDeclSymbols.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols), + clauseOps.procBindKindAttr, delayedPrivatizationInfo.originalAddresses, + delayedPrivatizationInfo.privatizers.empty() + ? nullptr + : firOpBuilder.getArrayAttr(privatizers), + isComposite ? firOpBuilder.getUnitAttr() : nullptr); + + if (clauseOps.numThreadsVar) { + if (evalNumThreadsOutsideTarget) + targetOp.getNumThreadsMutable().assign(clauseOps.numThreadsVar); + else + parallelOp.getNumThreadsVarMutable().assign(clauseOps.numThreadsVar); } - firOpBuilder.create(currentLocation); + return parallelOp; +} - // Create the insertion point after the marker. - firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); - if (genNested) - genNestedEvaluations(converter, eval); +static mlir::omp::SectionOp +genSectionOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + // Currently only private/firstprivate clause is handled, and + // all privatization is done within `omp.section` operations. + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested) + .setClauses(&clauseList)); +} + +static mlir::omp::SectionsOp +genSectionsOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + mlir::Location currentLocation, + const SectionsOpClauseOps &clauseOps) { + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(false), + /*reduction_vars=*/mlir::ValueRange(), /*reductions=*/nullptr, + clauseOps.allocateVars, clauseOps.allocatorVars, clauseOps.nowaitAttr); +} + +static mlir::omp::SimdLoopOp +genSimdLoopOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool isComposite, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + Fortran::lower::StatementContext stmtCtx; + SimdLoopOpClauseOps clauseOps; + genSimdLoopClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation, + clauseOps); + + auto *nestedEval = + getCollapsedLoopEval(eval, Fortran::lower::getCollapseValue(clauseList)); + + // TODO Create callback to add reduction vars as entry block arguments. + + // TODO Store clauseOps.reductionVars, clauseOps.reductionDeclSymbols in op. + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval) + .setGenNested(false), + /*result_types=*/mlir::TypeRange(), clauseOps.alignedVars, + clauseOps.alignmentAttrs.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.alignmentAttrs), + clauseOps.ifVar, clauseOps.nontemporalVars, clauseOps.orderAttr, + clauseOps.simdlenAttr, clauseOps.safelenAttr, + isComposite ? firOpBuilder.getUnitAttr() : nullptr); +} + +static mlir::omp::SingleOp +genSingleOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList &endClauseList) { + SingleOpClauseOps clauseOps; + genSingleClauses(converter, semaCtx, beginClauseList, endClauseList, + currentLocation, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested) + .setClauses(&beginClauseList), + clauseOps.allocateVars, clauseOps.allocatorVars, + clauseOps.copyprivateVars, + clauseOps.copyprivateFuncs.empty() + ? nullptr + : converter.getFirOpBuilder().getArrayAttr( + clauseOps.copyprivateFuncs), + clauseOps.nowaitAttr); } static mlir::omp::TargetOp @@ -1243,50 +1719,29 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &clauseList, - llvm::omp::Directive directive, bool outerCombined = false) { + bool outerCombined = false) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); Fortran::lower::StatementContext stmtCtx; - mlir::Value ifClauseOperand, deviceOperand, threadLimitOperand; - mlir::UnitAttr nowaitAttr; - llvm::SmallVector dependTypeOperands; - llvm::SmallVector mapOperands, dependOperands; - llvm::SmallVector mapSymTypes; - llvm::SmallVector mapSymLocs; - llvm::SmallVector mapSymbols; - llvm::SmallVector reductionSymbols; - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Target, - ifClauseOperand); - cp.processDevice(stmtCtx, deviceOperand); - cp.processThreadLimit(stmtCtx, threadLimitOperand); - cp.processDepend(dependTypeOperands, dependOperands); - cp.processMap(currentLocation, directive, stmtCtx, mapOperands, &mapSymTypes, - &mapSymLocs, &mapSymbols); - - cp.processTODO( - currentLocation, llvm::omp::Directive::OMPD_target); - // Process host-only clauses. - if (!llvm::cast(*converter.getModuleOp()) - .getIsTargetDevice()) - cp.processNowait(nowaitAttr); + bool processHostOnlyClauses = + !llvm::cast(*converter.getModuleOp()) + .getIsTargetDevice(); - if (outerCombined) - cp.processTargetReduction(reductionSymbols); + TargetOpClauseOps clauseOps; + clauseOps.mapSymbols.emplace(); + clauseOps.mapSymLocs.emplace(); + clauseOps.mapSymTypes.emplace(); + genTargetClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation, + processHostOnlyClauses, /*processReduction=*/outerCombined, + clauseOps); // 5.8.1 Implicit Data-Mapping Attribute Rules // The following code follows the implicit data-mapping rules to map all the // symbols used inside the region that have not been explicitly mapped using // the map clause. auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) { - if (llvm::find(mapSymbols, &sym) == mapSymbols.end()) { + if (llvm::find(*clauseOps.mapSymbols, &sym) == + clauseOps.mapSymbols->end()) { mlir::Value baseOp = converter.getSymbolAddress(sym); if (!baseOp) if (const auto *details = sym.template detailsIf< @@ -1301,22 +1756,21 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym); name << sym.name().ToString(); - Fortran::lower::AddrAndBoundsInfo info = - getDataOperandBaseAddr(converter, converter.getFirOpBuilder(), sym, - converter.getCurrentLocation()); + Fortran::lower::AddrAndBoundsInfo info = getDataOperandBaseAddr( + converter, firOpBuilder, sym, converter.getCurrentLocation()); if (fir::unwrapRefType(info.addr.getType()).isa()) bounds = Fortran::lower::genBoundsOpsFromBox( - converter.getFirOpBuilder(), converter.getCurrentLocation(), - converter, dataExv, info); + firOpBuilder, converter.getCurrentLocation(), converter, + dataExv, info); if (fir::unwrapRefType(info.addr.getType()).isa()) { bool dataExvIsAssumedSize = Fortran::semantics::IsAssumedSizeArray(sym.GetUltimate()); bounds = Fortran::lower::genBaseBoundsOps( - converter.getFirOpBuilder(), converter.getCurrentLocation(), - converter, dataExv, dataExvIsAssumedSize); + firOpBuilder, converter.getCurrentLocation(), converter, dataExv, + dataExvIsAssumedSize); } llvm::omp::OpenMPOffloadMappingFlags mapFlag = @@ -1342,8 +1796,8 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; } - } else if (llvm::find(reductionSymbols, &sym) != - reductionSymbols.end()) { + } else if (llvm::find(clauseOps.targetReductionSymbols, &sym) != + clauseOps.targetReductionSymbols.end()) { // Do a tofrom map for reduction variables. mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; @@ -1355,39 +1809,160 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, } mlir::Value mapOp = createMapInfoOp( - converter.getFirOpBuilder(), baseOp.getLoc(), baseOp, mlir::Value{}, - name.str(), bounds, {}, + firOpBuilder, baseOp.getLoc(), baseOp, mlir::Value{}, name.str(), + bounds, {}, static_cast< std::underlying_type_t>( mapFlag), captureKind, baseOp.getType()); - mapOperands.push_back(mapOp); - mapSymTypes.push_back(baseOp.getType()); - mapSymLocs.push_back(baseOp.getLoc()); - mapSymbols.push_back(&sym); + clauseOps.mapVars.push_back(mapOp); + clauseOps.mapSymTypes->push_back(baseOp.getType()); + clauseOps.mapSymLocs->push_back(baseOp.getLoc()); + clauseOps.mapSymbols->push_back(&sym); } } }; Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap); - auto targetOp = converter.getFirOpBuilder().create( - currentLocation, ifClauseOperand, deviceOperand, threadLimitOperand, + auto targetOp = firOpBuilder.create( + currentLocation, clauseOps.ifVar, clauseOps.deviceVar, + clauseOps.threadLimitVar, /*trip_count=*/nullptr, - dependTypeOperands.empty() + clauseOps.dependTypeAttrs.empty() ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - dependTypeOperands), - dependOperands, nowaitAttr, mapOperands, + : firOpBuilder.getArrayAttr(clauseOps.dependTypeAttrs), + clauseOps.dependVars, clauseOps.nowaitAttr, clauseOps.mapVars, /*num_teams_lower=*/nullptr, /*num_teams_upper=*/nullptr, /*teams_thread_limit=*/nullptr, /*num_threads=*/nullptr); - genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSymTypes, - mapSymLocs, mapSymbols, currentLocation); + genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, + *clauseOps.mapSymTypes, *clauseOps.mapSymLocs, + *clauseOps.mapSymbols, currentLocation); return targetOp; } +static mlir::omp::TaskGroupOp +genTaskGroupOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + TaskGroupOpClauseOps clauseOps; + genTaskGroupClauses(converter, semaCtx, clauseList, currentLocation, + clauseOps); + + // TODO Possibly create callback to add task reduction vars as entry block + // arguments. + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested) + .setClauses(&clauseList), + clauseOps.taskReductionVars, + clauseOps.taskReductionDeclSymbols.empty() + ? nullptr + : converter.getFirOpBuilder().getArrayAttr( + clauseOps.taskReductionDeclSymbols), + clauseOps.allocateVars, clauseOps.allocatorVars); +} + +static mlir::omp::TaskLoopOp +genTaskLoopOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool isComposite, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + Fortran::lower::StatementContext stmtCtx; + TaskLoopOpClauseOps clauseOps; + clauseOps.reductionSymbols.emplace(); + genTaskLoopClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation, + clauseOps); + + auto *nestedEval = + getCollapsedLoopEval(eval, Fortran::lower::getCollapseValue(clauseList)); + + auto reductionCallback = [&](mlir::Operation *op) { + // TODO Possibly add in-reductions to the entry block argument list. + genReductionVars(op, converter, currentLocation, + *clauseOps.reductionSymbols, clauseOps.reductionTypes); + return *clauseOps.reductionSymbols; + }; + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval) + .setGenRegionEntryCb(reductionCallback) + .setReductions(*clauseOps.reductionSymbols, clauseOps.reductionTypes) + .setGenNested(false), + clauseOps.ifVar, clauseOps.finalVar, clauseOps.untiedAttr, + clauseOps.mergeableAttr, clauseOps.inReductionVars, + clauseOps.inReductionDeclSymbols.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.inReductionDeclSymbols), + clauseOps.reductionVars, + clauseOps.reductionDeclSymbols.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols), + clauseOps.priorityVar, clauseOps.allocateVars, clauseOps.allocatorVars, + clauseOps.grainsizeVar, clauseOps.numTasksVar, clauseOps.nogroupAttr, + isComposite ? firOpBuilder.getUnitAttr() : nullptr); +} + +static mlir::omp::TaskOp +genTaskOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + Fortran::lower::StatementContext stmtCtx; + TaskOpClauseOps clauseOps; + genTaskClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation, + clauseOps); + + // TODO Possibly create callback to add in-reductions as entry block + // arguments. + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested) + .setClauses(&clauseList), + clauseOps.ifVar, clauseOps.finalVar, clauseOps.untiedAttr, + clauseOps.mergeableAttr, clauseOps.inReductionVars, + clauseOps.inReductionDeclSymbols.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.inReductionDeclSymbols), + clauseOps.priorityVar, + clauseOps.dependTypeAttrs.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.dependTypeAttrs), + clauseOps.dependVars, clauseOps.allocateVars, clauseOps.allocatorVars); +} + +static mlir::omp::TaskWaitOp +genTaskWaitOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + TaskWaitOpClauseOps clauseOps; + genTaskWaitClauses(converter, semaCtx, clauseList, currentLocation, + clauseOps); + return converter.getFirOpBuilder().create( + currentLocation); +} + +static mlir::omp::TaskYieldOp +genTaskYieldOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + mlir::Location currentLocation) { + return converter.getFirOpBuilder().create( + currentLocation); +} + static mlir::omp::TeamsOp genTeamsOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, @@ -1395,192 +1970,233 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &clauseList, bool outerCombined = false) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); Fortran::lower::StatementContext stmtCtx; - mlir::Value numTeamsClauseOperand, ifClauseOperand, threadLimitClauseOperand; - llvm::SmallVector allocateOperands, allocatorOperands, - reductionVars; - llvm::SmallVector reductionDeclSymbols; - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Teams, - ifClauseOperand); - cp.processAllocate(allocatorOperands, allocateOperands); - cp.processDefault(); - // cp.processTODO( - // currentLocation, llvm::omp::Directive::OMPD_teams); + TeamsOpClauseOps clauseOps; - // Evaluate NUM_TEAMS and THREAD_LIMIT on the host device, if currently inside - // of an omp.target operation. auto offloadModOp = llvm::cast( converter.getModuleOp().getOperation()); - mlir::omp::TargetOp targetOp = - findParentTargetOp(converter.getFirOpBuilder()); + mlir::omp::TargetOp targetOp = findParentTargetOp(firOpBuilder); - bool mustEvalOutsideTarget = targetOp && !offloadModOp.getIsTargetDevice(); - if (mustEvalOutsideTarget) { - HostClausesInsertionGuard guard(converter.getFirOpBuilder()); - cp.processNumTeams(stmtCtx, numTeamsClauseOperand); - cp.processThreadLimit(stmtCtx, threadLimitClauseOperand); - } else { - cp.processNumTeams(stmtCtx, numTeamsClauseOperand); - cp.processThreadLimit(stmtCtx, threadLimitClauseOperand); - } + bool evalNumTeamsOutsideTarget = + targetOp && !offloadModOp.getIsTargetDevice(); + + genTeamsClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation, + evalNumTeamsOutsideTarget, clauseOps); + + // TODO Possibly create callback to add reductions as entry block arguments. auto teamsOp = genOpWithBody( OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) .setGenNested(genNested) .setOuterCombined(outerCombined) .setClauses(&clauseList), - /*num_teams_lower=*/nullptr, /*num_teams_upper=*/nullptr, ifClauseOperand, - /*thread_limit=*/nullptr, allocateOperands, allocatorOperands, - reductionVars, - reductionDeclSymbols.empty() + /*num_teams_lower=*/nullptr, /*num_teams_upper=*/nullptr, clauseOps.ifVar, + /*thread_limit=*/nullptr, clauseOps.allocateVars, clauseOps.allocatorVars, + clauseOps.reductionVars, + clauseOps.reductionDeclSymbols.empty() ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - reductionDeclSymbols)); - if (mustEvalOutsideTarget) { - if (numTeamsClauseOperand) - targetOp.getNumTeamsUpperMutable().assign(numTeamsClauseOperand); - if (threadLimitClauseOperand) - targetOp.getTeamsThreadLimitMutable().assign(threadLimitClauseOperand); + : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols)); + + // TODO Populate lower bound once supported by the clause processor + if (evalNumTeamsOutsideTarget) { + if (clauseOps.numTeamsUpperVar) + targetOp.getNumTeamsUpperMutable().assign(clauseOps.numTeamsUpperVar); + if (clauseOps.threadLimitVar) + targetOp.getTeamsThreadLimitMutable().assign(clauseOps.threadLimitVar); } else { - if (numTeamsClauseOperand) - teamsOp.getNumTeamsUpperMutable().assign(numTeamsClauseOperand); - if (threadLimitClauseOperand) - teamsOp.getThreadLimitMutable().assign(threadLimitClauseOperand); + if (clauseOps.numTeamsUpperVar) + teamsOp.getNumTeamsUpperMutable().assign(clauseOps.numTeamsUpperVar); + if (clauseOps.threadLimitVar) + teamsOp.getThreadLimitMutable().assign(clauseOps.threadLimitVar); } return teamsOp; } -static mlir::omp::DistributeOp -genDistributeOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList, - bool outerCombined = false) { - // TODO Process clauses - // ClauseProcessor cp(converter, clauseList); - // cp.processAllocate(allocatorOperands, allocateOperands); - // ... - - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setOuterCombined(outerCombined) - .setClauses(&clauseList), - /*dist_schedule_static=*/nullptr, - /*chunk_size=*/nullptr, - /*allocate_vars=*/mlir::ValueRange(), - /*allocators_vars=*/mlir::ValueRange(), - /*order_val=*/nullptr); -} - -/// Extract the list of function and variable symbols affected by the given -/// 'declare target' directive and return the intended device type for them. -static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo( - Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, - llvm::SmallVectorImpl &symbolAndClause) { - - // The default capture type - mlir::omp::DeclareTargetDeviceType deviceType = - mlir::omp::DeclareTargetDeviceType::any; - const auto &spec = std::get( - declareTargetConstruct.t); +static mlir::omp::WsLoopOp +genWsLoopOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool isComposite, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList *endClauseList) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + Fortran::lower::StatementContext stmtCtx; + WsloopOpClauseOps clauseOps; + clauseOps.reductionSymbols.emplace(); + genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList, + currentLocation, clauseOps); - if (const auto *objectList{ - Fortran::parser::Unwrap(spec.u)}) { - // Case: declare target(func, var1, var2) - gatherFuncAndVarSyms(*objectList, mlir::omp::DeclareTargetCaptureClause::to, - symbolAndClause); - } else if (const auto *clauseList{ - Fortran::parser::Unwrap( - spec.u)}) { - if (clauseList->v.empty()) { - // Case: declare target, implicit capture of function - symbolAndClause.emplace_back( - mlir::omp::DeclareTargetCaptureClause::to, - eval.getOwningProcedure()->getSubprogramSymbol()); - } + auto *nestedEval = getCollapsedLoopEval( + eval, Fortran::lower::getCollapseValue(beginClauseList)); - ClauseProcessor cp(converter, semaCtx, *clauseList); - cp.processTo(symbolAndClause); - cp.processEnter(symbolAndClause); - cp.processLink(symbolAndClause); - cp.processDeviceType(deviceType); - cp.processTODO( - converter.getCurrentLocation(), - llvm::omp::Directive::OMPD_declare_target); - } + auto reductionCallback = [&](mlir::Operation *op) { + genReductionVars(op, converter, currentLocation, + *clauseOps.reductionSymbols, clauseOps.reductionTypes); + return *clauseOps.reductionSymbols; + }; - return deviceType; + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval) + .setReductions(*clauseOps.reductionSymbols, clauseOps.reductionTypes) + .setGenRegionEntryCb(reductionCallback) + .setGenNested(false), + clauseOps.linearVars, clauseOps.linearStepVars, clauseOps.reductionVars, + clauseOps.reductionDeclSymbols.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols), + clauseOps.scheduleValAttr, clauseOps.scheduleChunkVar, + clauseOps.scheduleModAttr, clauseOps.scheduleSimdAttr, + clauseOps.nowaitAttr, clauseOps.orderedAttr, clauseOps.orderAttr, + isComposite ? firOpBuilder.getUnitAttr() : nullptr); } -static void collectDeferredDeclareTargets( +//===----------------------------------------------------------------------===// +// Code generation functions for composite constructs +//===----------------------------------------------------------------------===// + +static void genCompositeDistributeParallelDo( Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, - llvm::SmallVectorImpl - &deferredDeclareTarget) { - llvm::SmallVector symbolAndClause; - mlir::omp::DeclareTargetDeviceType devType = getDeclareTargetInfo( - converter, semaCtx, eval, declareTargetConstruct, symbolAndClause); - // Return the device type only if at least one of the targets for the - // directive is a function or subroutine - mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); - - for (const DeclareTargetCapturePair &symClause : symbolAndClause) { - mlir::Operation *op = mod.lookupSymbol(converter.mangleName( - std::get(symClause))); - - if (!op) { - deferredDeclareTarget.push_back( - {std::get<0>(symClause), devType, std::get<1>(symClause)}); - } - } -} - -static std::optional -getDeclareTargetFunctionDevice( + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList *endClauseList, + mlir::Location currentLocation) { + Fortran::lower::StatementContext stmtCtx; + DistributeOpClauseOps distributeClauseOps; + ParallelOpClauseOps parallelClauseOps; + WsloopOpClauseOps wsLoopClauseOps; + + genDistributeClauses(converter, semaCtx, beginClauseList, currentLocation, + distributeClauseOps); + // TODO evalNumThreadsOutsideTarget + genParallelClauses(converter, semaCtx, stmtCtx, beginClauseList, + currentLocation, /*processReduction=*/true, + /*evalNumThreadsOutsideTarget=*/true, parallelClauseOps); + genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList, + currentLocation, wsLoopClauseOps); + + // TODO Pass clauseOps structures to generate wrappers + // genDistributeOp(); + // genParallelOp(); + // genWsLoopOp(); + TODO(currentLocation, "Composite DISTRIBUTE PARALLEL DO not implemented"); +} + +static void genCompositeDistributeParallelDoSimd( Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenMPDeclareTargetConstruct - &declareTargetConstruct) { - llvm::SmallVector symbolAndClause; - mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo( - converter, semaCtx, eval, declareTargetConstruct, symbolAndClause); + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList *endClauseList, + mlir::Location currentLocation) { + Fortran::lower::StatementContext stmtCtx; + DistributeOpClauseOps distributeClauseOps; + ParallelOpClauseOps parallelClauseOps; + WsloopOpClauseOps wsLoopClauseOps; + SimdLoopOpClauseOps simdClauseOps; + + genDistributeClauses(converter, semaCtx, beginClauseList, currentLocation, + distributeClauseOps); + // TODO evalNumThreadsOutsideTarget + genParallelClauses(converter, semaCtx, stmtCtx, beginClauseList, + currentLocation, /*processReduction=*/true, + /*evalNumThreadsOutsideTarget=*/true, parallelClauseOps); + genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList, + currentLocation, wsLoopClauseOps); + genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, + currentLocation, simdClauseOps); + + // TODO Pass clauseOps structures to generate wrappers + // genDistributeOp(); + // genParallelOp(); + // genWsloopOp(); + // genSimdLoopOp(); + TODO(currentLocation, + "Composite DISTRIBUTE PARALLEL DO SIMD not implemented"); +} + +static void genCompositeDistributeSimd( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList *endClauseList, + mlir::Location currentLocation) { + Fortran::lower::StatementContext stmtCtx; + DistributeOpClauseOps distributeClauseOps; + SimdLoopOpClauseOps simdClauseOps; - // Return the device type only if at least one of the targets for the - // directive is a function or subroutine - mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); - for (const DeclareTargetCapturePair &symClause : symbolAndClause) { - mlir::Operation *op = mod.lookupSymbol(converter.mangleName( - std::get(symClause))); + genDistributeClauses(converter, semaCtx, beginClauseList, currentLocation, + distributeClauseOps); + genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, + currentLocation, simdClauseOps); - if (mlir::isa_and_nonnull(op)) - return deviceType; - } + // TODO Pass clauseOps structures to generate wrappers + // genDistributeOp(); + // genSimdLoopOp(); + TODO(currentLocation, "Composite DISTRIBUTE SIMD not implemented"); +} - return std::nullopt; +static void +genCompositeDoSimd(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList *endClauseList, + mlir::Location currentLocation) { + Fortran::lower::StatementContext stmtCtx; + WsloopOpClauseOps wsLoopClauseOps; + SimdLoopOpClauseOps simdClauseOps; + + genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList, + currentLocation, wsLoopClauseOps); + genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, + currentLocation, simdClauseOps); + + // TODO Pass clauseOps structures to generate wrappers + // genWsloopOp(); + // genSimdLoopOp(); + TODO(currentLocation, "Composite DO SIMD not implemented"); } static void -genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - const Fortran::parser::OpenMPSimpleStandaloneConstruct - &simpleStandaloneConstruct) { +genCompositeTaskLoopSimd(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList *endClauseList, + mlir::Location currentLocation) { + Fortran::lower::StatementContext stmtCtx; + TaskLoopOpClauseOps taskLoopClauseOps; + SimdLoopOpClauseOps simdClauseOps; + + genTaskLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, + currentLocation, taskLoopClauseOps); + genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, + currentLocation, simdClauseOps); + + // TODO Pass clauseOps structures to generate wrappers + // genTaskloopOp(); + // genSimdLoopOp(); + TODO(currentLocation, "Composite TASKLOOP SIMD not implemented"); +} + +//===----------------------------------------------------------------------===// +// genOMP() Code generation functions +//===----------------------------------------------------------------------===// + +static void genOMP(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPSimpleStandaloneConstruct + &simpleStandaloneConstruct) { const auto &directive = std::get( simpleStandaloneConstruct.t); - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - const auto &opClauseList = + const auto &clauseList = std::get(simpleStandaloneConstruct.t); mlir::Location currentLocation = converter.genLocation(directive.source); @@ -1588,33 +2204,29 @@ genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter, default: break; case llvm::omp::Directive::OMPD_barrier: - firOpBuilder.create(currentLocation); + genBarrierOp(converter, semaCtx, eval, currentLocation); break; case llvm::omp::Directive::OMPD_taskwait: - ClauseProcessor(converter, semaCtx, opClauseList) - .processTODO( - currentLocation, llvm::omp::Directive::OMPD_taskwait); - firOpBuilder.create(currentLocation); + genTaskWaitOp(converter, semaCtx, eval, currentLocation, clauseList); break; case llvm::omp::Directive::OMPD_taskyield: - firOpBuilder.create(currentLocation); + genTaskYieldOp(converter, semaCtx, eval, currentLocation); break; case llvm::omp::Directive::OMPD_target_data: - genDataOp(converter, semaCtx, eval, genNested, currentLocation, - opClauseList); + genDataOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, + clauseList); break; case llvm::omp::Directive::OMPD_target_enter_data: genEnterExitUpdateDataOp( - converter, semaCtx, currentLocation, opClauseList); + converter, semaCtx, currentLocation, clauseList); break; case llvm::omp::Directive::OMPD_target_exit_data: genEnterExitUpdateDataOp( - converter, semaCtx, currentLocation, opClauseList); + converter, semaCtx, currentLocation, clauseList); break; case llvm::omp::Directive::OMPD_target_update: genEnterExitUpdateDataOp( - converter, semaCtx, currentLocation, opClauseList); + converter, semaCtx, currentLocation, clauseList); break; case llvm::omp::Directive::OMPD_ordered: TODO(currentLocation, "OMPD_ordered"); @@ -1622,28 +2234,21 @@ genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter, } static void -genOmpFlush(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenMPFlushConstruct &flushConstruct) { - llvm::SmallVector operandRange; - if (const auto &ompObjectList = - std::get>( - flushConstruct.t)) - genObjectList(*ompObjectList, converter, operandRange); - const auto &memOrderClause = +genOMP(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPFlushConstruct &flushConstruct) { + const auto &verbatim = std::get(flushConstruct.t); + const auto &objectList = + std::get>(flushConstruct.t); + const auto &clauseList = std::get>>( flushConstruct.t); - if (memOrderClause && memOrderClause->size() > 0) - TODO(converter.getCurrentLocation(), "Handle OmpMemoryOrderClause"); - converter.getFirOpBuilder().create( - converter.getCurrentLocation(), operandRange); + mlir::Location currentLocation = converter.genLocation(verbatim.source); + genFlushOp(converter, semaCtx, eval, currentLocation, objectList, clauseList); } -//===----------------------------------------------------------------------===// -// genOMP() Code generation helper functions -//===----------------------------------------------------------------------===// - static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, @@ -1654,12 +2259,11 @@ genOMP(Fortran::lower::AbstractConverter &converter, Fortran::common::visitors{ [&](const Fortran::parser::OpenMPSimpleStandaloneConstruct &simpleStandaloneConstruct) { - genOmpSimpleStandalone(converter, semaCtx, eval, - /*genNested=*/true, - simpleStandaloneConstruct); + genOMP(converter, symTable, semaCtx, eval, + simpleStandaloneConstruct); }, [&](const Fortran::parser::OpenMPFlushConstruct &flushConstruct) { - genOmpFlush(converter, semaCtx, eval, flushConstruct); + genOMP(converter, symTable, semaCtx, eval, flushConstruct); }, [&](const Fortran::parser::OpenMPCancelConstruct &cancelConstruct) { TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct"); @@ -1672,300 +2276,6 @@ genOMP(Fortran::lower::AbstractConverter &converter, standaloneConstruct.u); } -static void convertLoopBounds(Fortran::lower::AbstractConverter &converter, - mlir::Location loc, - llvm::SmallVectorImpl &lowerBound, - llvm::SmallVectorImpl &upperBound, - llvm::SmallVectorImpl &step, - std::size_t loopVarTypeSize) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - // The types of lower bound, upper bound, and step are converted into the - // type of the loop variable if necessary. - mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); - for (unsigned it = 0; it < (unsigned)lowerBound.size(); it++) { - lowerBound[it] = - firOpBuilder.createConvert(loc, loopVarType, lowerBound[it]); - upperBound[it] = - firOpBuilder.createConvert(loc, loopVarType, upperBound[it]); - step[it] = firOpBuilder.createConvert(loc, loopVarType, step[it]); - } -} - -static llvm::SmallVector -genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, - mlir::Location &loc, - const llvm::SmallVector &args) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - auto ®ion = op->getRegion(0); - - std::size_t loopVarTypeSize = 0; - for (const Fortran::semantics::Symbol *arg : args) - loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); - mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); - llvm::SmallVector tiv(args.size(), loopVarType); - llvm::SmallVector locs(args.size(), loc); - firOpBuilder.createBlock(®ion, {}, tiv, locs); - // The argument is not currently in memory, so make a temporary for the - // argument, and store it there, then bind that location to the argument. - mlir::Operation *storeOp = nullptr; - for (auto [argIndex, argSymbol] : llvm::enumerate(args)) { - mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex)); - storeOp = - createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); - } - firOpBuilder.setInsertionPointAfter(storeOp); - - return args; -} - -static llvm::SmallVector -genLoopAndReductionVars( - mlir::Operation *op, Fortran::lower::AbstractConverter &converter, - mlir::Location &loc, - const llvm::SmallVector &loopArgs, - const llvm::SmallVector &reductionArgs, - llvm::SmallVector &reductionTypes) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - - llvm::SmallVector blockArgTypes; - llvm::SmallVector blockArgLocs; - blockArgTypes.reserve(loopArgs.size() + reductionArgs.size()); - blockArgLocs.reserve(blockArgTypes.size()); - mlir::Block *entryBlock; - - if (loopArgs.size()) { - std::size_t loopVarTypeSize = 0; - for (const Fortran::semantics::Symbol *arg : loopArgs) - loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); - mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); - std::fill_n(std::back_inserter(blockArgTypes), loopArgs.size(), - loopVarType); - std::fill_n(std::back_inserter(blockArgLocs), loopArgs.size(), loc); - } - if (reductionArgs.size()) { - llvm::copy(reductionTypes, std::back_inserter(blockArgTypes)); - std::fill_n(std::back_inserter(blockArgLocs), reductionArgs.size(), loc); - } - entryBlock = firOpBuilder.createBlock(&op->getRegion(0), {}, blockArgTypes, - blockArgLocs); - // The argument is not currently in memory, so make a temporary for the - // argument, and store it there, then bind that location to the argument. - if (loopArgs.size()) { - mlir::Operation *storeOp = nullptr; - for (auto [argIndex, argSymbol] : llvm::enumerate(loopArgs)) { - mlir::Value indexVal = - fir::getBase(op->getRegion(0).front().getArgument(argIndex)); - storeOp = - createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); - } - firOpBuilder.setInsertionPointAfter(storeOp); - } - // Bind the reduction arguments to their block arguments - for (auto [arg, prv] : llvm::zip_equal( - reductionArgs, - llvm::drop_begin(entryBlock->getArguments(), loopArgs.size()))) { - converter.bindSymbol(*arg, prv); - } - - return loopArgs; -} - -static void -createSimdLoop(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - llvm::omp::Directive ompDirective, - const Fortran::parser::OmpClauseList &loopOpClauseList, - mlir::Location loc) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - DataSharingProcessor dsp(converter, loopOpClauseList, eval); - dsp.processStep1(); - - Fortran::lower::StatementContext stmtCtx; - mlir::Value scheduleChunkClauseOperand, ifClauseOperand; - llvm::SmallVector lowerBound, upperBound, step, reductionVars; - llvm::SmallVector alignedVars, nontemporalVars; - llvm::SmallVector iv; - llvm::SmallVector reductionDeclSymbols; - mlir::omp::ClauseOrderKindAttr orderClauseOperand; - mlir::IntegerAttr simdlenClauseOperand, safelenClauseOperand; - std::size_t loopVarTypeSize; - - ClauseProcessor cp(converter, semaCtx, loopOpClauseList); - cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv, - loopVarTypeSize); - cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand); - cp.processReduction(loc, reductionVars, reductionDeclSymbols); - cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Simd, - ifClauseOperand); - cp.processSimdlen(simdlenClauseOperand); - cp.processSafelen(safelenClauseOperand); - cp.processTODO(loc, ompDirective); - - convertLoopBounds(converter, loc, lowerBound, upperBound, step, - loopVarTypeSize); - - mlir::TypeRange resultType; - auto simdLoopOp = firOpBuilder.create( - loc, resultType, lowerBound, upperBound, step, alignedVars, - /*alignment_values=*/nullptr, ifClauseOperand, nontemporalVars, - orderClauseOperand, simdlenClauseOperand, safelenClauseOperand, - /*inclusive=*/firOpBuilder.getUnitAttr()); - - auto *nestedEval = getCollapsedLoopEval( - eval, Fortran::lower::getCollapseValue(loopOpClauseList)); - - auto ivCallback = [&](mlir::Operation *op) { - return genLoopVars(op, converter, loc, iv); - }; - - createBodyOfOp( - simdLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval) - .setClauses(&loopOpClauseList) - .setDataSharingProcessor(&dsp) - .setGenRegionEntryCb(ivCallback)); -} - -static void createWsLoop(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - llvm::omp::Directive ompDirective, - const Fortran::parser::OmpClauseList &beginClauseList, - const Fortran::parser::OmpClauseList *endClauseList, - mlir::Location loc) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - DataSharingProcessor dsp(converter, beginClauseList, eval); - dsp.processStep1(); - - Fortran::lower::StatementContext stmtCtx; - mlir::Value scheduleChunkClauseOperand; - llvm::SmallVector lowerBound, upperBound, step, reductionVars; - llvm::SmallVector linearVars, linearStepVars; - llvm::SmallVector iv; - llvm::SmallVector reductionDeclSymbols; - llvm::SmallVector reductionSymbols; - mlir::omp::ClauseOrderKindAttr orderClauseOperand; - mlir::omp::ClauseScheduleKindAttr scheduleValClauseOperand; - mlir::UnitAttr nowaitClauseOperand, scheduleSimdClauseOperand; - mlir::IntegerAttr orderedClauseOperand; - mlir::omp::ScheduleModifierAttr scheduleModClauseOperand; - std::size_t loopVarTypeSize; - - ClauseProcessor cp(converter, semaCtx, beginClauseList); - cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv, - loopVarTypeSize); - cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand); - cp.processReduction(loc, reductionVars, reductionDeclSymbols, - &reductionSymbols); - cp.processTODO(loc, ompDirective); - - convertLoopBounds(converter, loc, lowerBound, upperBound, step, - loopVarTypeSize); - - auto wsLoopOp = firOpBuilder.create( - loc, lowerBound, upperBound, step, linearVars, linearStepVars, - reductionVars, - reductionDeclSymbols.empty() - ? nullptr - : mlir::ArrayAttr::get(firOpBuilder.getContext(), - reductionDeclSymbols), - scheduleValClauseOperand, scheduleChunkClauseOperand, - /*schedule_modifiers=*/nullptr, - /*simd_modifier=*/nullptr, nowaitClauseOperand, orderedClauseOperand, - orderClauseOperand, - /*inclusive=*/firOpBuilder.getUnitAttr()); - - // Handle attribute based clauses. - if (cp.processOrdered(orderedClauseOperand)) - wsLoopOp.setOrderedValAttr(orderedClauseOperand); - - if (cp.processSchedule(scheduleValClauseOperand, scheduleModClauseOperand, - scheduleSimdClauseOperand)) { - wsLoopOp.setScheduleValAttr(scheduleValClauseOperand); - wsLoopOp.setScheduleModifierAttr(scheduleModClauseOperand); - wsLoopOp.setSimdModifierAttr(scheduleSimdClauseOperand); - } - // In FORTRAN `nowait` clause occur at the end of `omp do` directive. - // i.e - // !$omp do - // <...> - // !$omp end do nowait - if (endClauseList) { - if (ClauseProcessor(converter, semaCtx, *endClauseList) - .processNowait(nowaitClauseOperand)) - wsLoopOp.setNowaitAttr(nowaitClauseOperand); - } - - auto *nestedEval = getCollapsedLoopEval( - eval, Fortran::lower::getCollapseValue(beginClauseList)); - - llvm::SmallVector reductionTypes; - reductionTypes.reserve(reductionVars.size()); - llvm::transform(reductionVars, std::back_inserter(reductionTypes), - [](mlir::Value v) { return v.getType(); }); - - auto ivCallback = [&](mlir::Operation *op) { - return genLoopAndReductionVars(op, converter, loc, iv, reductionSymbols, - reductionTypes); - }; - - createBodyOfOp( - wsLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval) - .setClauses(&beginClauseList) - .setDataSharingProcessor(&dsp) - .setReductions(&reductionSymbols, &reductionTypes) - .setGenRegionEntryCb(ivCallback)); - - // Create trip_count if inside of omp.target and this is host compilation - auto offloadMod = llvm::dyn_cast( - firOpBuilder.getModule().getOperation()); - auto targetOp = wsLoopOp->getParentOfType(); - - if (offloadMod && targetOp && !offloadMod.getIsTargetDevice() && - targetOp.isTargetSPMDLoop()) { - // Lower loop bounds and step, and process collapsing again, putting lowered - // values outside of omp.target this time. This enables calculating and - // accessing the trip count in the host, which is needed when lowering to - // LLVM IR via the OMPIRBuilder. - HostClausesInsertionGuard guard(firOpBuilder); - llvm::SmallVector outsideLB, outsideUB, outsideStep; - llvm::SmallVector outsideIV; - cp.processCollapse(loc, eval, outsideLB, outsideUB, outsideStep, outsideIV, - loopVarTypeSize); - targetOp.getTripCountMutable().assign( - calculateTripCount(converter, loc, outsideLB, outsideUB, outsideStep)); - } -} - -static void createSimdWsLoop( - Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive ompDirective, - const Fortran::parser::OmpClauseList &beginClauseList, - const Fortran::parser::OmpClauseList *endClauseList, mlir::Location loc) { - ClauseProcessor cp(converter, semaCtx, beginClauseList); - cp.processTODO< - Fortran::parser::OmpClause::Aligned, Fortran::parser::OmpClause::Allocate, - Fortran::parser::OmpClause::Linear, Fortran::parser::OmpClause::Safelen, - Fortran::parser::OmpClause::Simdlen, Fortran::parser::OmpClause::Order>( - loc, ompDirective); - // TODO: Add support for vectorization - add vectorization hints inside loop - // body. - // OpenMP standard does not specify the length of vector instructions. - // Currently we safely assume that for !$omp do simd pragma the SIMD length - // is equal to 1 (i.e. we generate standard workshare loop). - // When support for vectorization is enabled, then we need to add handling of - // if clause. Currently if clause can be skipped because we always assume - // SIMD length = 1. - createWsLoop(converter, semaCtx, eval, ompDirective, beginClauseList, - endClauseList, loc); -} - static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, @@ -1973,13 +2283,16 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, const Fortran::parser::OpenMPLoopConstruct &loopConstruct) { const auto &beginLoopDirective = std::get(loopConstruct.t); - const auto &loopOpClauseList = + const auto &beginClauseList = std::get(beginLoopDirective.t); mlir::Location currentLocation = converter.genLocation(beginLoopDirective.source); - const auto ompDirective = + llvm::omp::Directive origDirective = std::get(beginLoopDirective.t).v; + assert(llvm::omp::loopConstructSet.test(origDirective) && + "Expected loop construct"); + const auto *endClauseList = [&]() { using RetTy = const Fortran::parser::OmpClauseList *; if (auto &endLoopDirective = @@ -1991,61 +2304,191 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, return RetTy(); }(); - bool validDirective = false; - if (llvm::omp::topTaskloopSet.test(ompDirective)) { - validDirective = true; - TODO(currentLocation, "Taskloop construct"); - } else { - // Create omp.{target, teams, distribute, parallel} nested operations - if ((llvm::omp::allTargetSet & llvm::omp::loopConstructSet) - .test(ompDirective)) { - validDirective = true; - genTargetOp(converter, semaCtx, eval, /*genNested=*/false, - currentLocation, loopOpClauseList, ompDirective, - /*outerCombined=*/true); + /// Utility to remove the first leaf construct from a combined loop construct. + /// Composite constructs are not handled, as they cannot be split in that way. + auto peelCombinedLoopDirective = + [](llvm::omp::Directive dir) -> llvm::omp::Directive { + using D = llvm::omp::Directive; + switch (dir) { + case D::OMPD_masked_taskloop: + case D::OMPD_master_taskloop: + return D::OMPD_taskloop; + case D::OMPD_masked_taskloop_simd: + case D::OMPD_master_taskloop_simd: + return D::OMPD_taskloop_simd; + case D::OMPD_parallel_do: + return D::OMPD_do; + case D::OMPD_parallel_do_simd: + return D::OMPD_do_simd; + case D::OMPD_parallel_masked_taskloop: + return D::OMPD_masked_taskloop; + case D::OMPD_parallel_master_taskloop: + return D::OMPD_master_taskloop; + case D::OMPD_parallel_masked_taskloop_simd: + return D::OMPD_masked_taskloop_simd; + case D::OMPD_parallel_master_taskloop_simd: + return D::OMPD_master_taskloop_simd; + case D::OMPD_target_parallel_do: + return D::OMPD_parallel_do; + case D::OMPD_target_parallel_do_simd: + return D::OMPD_parallel_do_simd; + case D::OMPD_target_simd: + return D::OMPD_simd; + case D::OMPD_target_teams_distribute: + return D::OMPD_teams_distribute; + case D::OMPD_target_teams_distribute_parallel_do: + return D::OMPD_teams_distribute_parallel_do; + case D::OMPD_target_teams_distribute_parallel_do_simd: + return D::OMPD_teams_distribute_parallel_do_simd; + case D::OMPD_target_teams_distribute_simd: + return D::OMPD_teams_distribute_simd; + case D::OMPD_teams_distribute: + return D::OMPD_distribute; + case D::OMPD_teams_distribute_parallel_do: + return D::OMPD_distribute_parallel_do; + case D::OMPD_teams_distribute_parallel_do_simd: + return D::OMPD_distribute_parallel_do_simd; + case D::OMPD_teams_distribute_simd: + return D::OMPD_distribute_simd; + case D::OMPD_parallel_loop: + case D::OMPD_teams_loop: + return D::OMPD_loop; + case D::OMPD_target_parallel_loop: + return D::OMPD_parallel_loop; + case D::OMPD_target_teams_loop: + return D::OMPD_teams_loop; + default: + llvm_unreachable("Unexpected non-combined loop construct"); } - if ((llvm::omp::allTeamsSet & llvm::omp::loopConstructSet) - .test(ompDirective)) { - validDirective = true; - genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, - loopOpClauseList, /*outerCombined=*/true); - } - if (llvm::omp::allDistributeSet.test(ompDirective)) { - validDirective = true; - bool outerCombined = llvm::omp::topDistributeSet.test(ompDirective); - genDistributeOp(converter, semaCtx, eval, /*genNested=*/false, - currentLocation, loopOpClauseList, outerCombined); - } - if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet) - .test(ompDirective)) { - validDirective = true; - genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, - currentLocation, loopOpClauseList, - /*outerCombined=*/true); + }; + + // Privatization and loop nest clause processing must be done before producing + // any wrappers and after combined constructs, so that any operations created + // are outside of the wrapper nest. + DataSharingProcessor dsp(converter, beginClauseList, eval); + LoopNestOpClauseOps clauseOps; + auto processLoopNestClauses = [&]() { + dsp.processStep1(); + genLoopNestClauses(converter, semaCtx, eval, beginClauseList, + currentLocation, clauseOps); + }; + + llvm::omp::Directive ompDirective = origDirective; + if (llvm::omp::topTargetSet.test(ompDirective)) { + // TODO Combined constructs: Call genClauses and pass them in. + genTargetOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, + beginClauseList, /*outerCombined=*/true); + ompDirective = peelCombinedLoopDirective(ompDirective); + } + + if (llvm::omp::topTeamsSet.test(ompDirective)) { + genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, + beginClauseList, /*outerCombined=*/true); + ompDirective = peelCombinedLoopDirective(ompDirective); + } + + if (llvm::omp::topParallelSet.test(ompDirective)) { + genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, + /*isComposite=*/false, currentLocation, beginClauseList, + /*outerCombined=*/true); + ompDirective = peelCombinedLoopDirective(ompDirective); + processLoopNestClauses(); + } else { + processLoopNestClauses(); + + if (llvm::omp::topDistributeSet.test(ompDirective)) { + switch (ompDirective) { + case llvm::omp::Directive::OMPD_distribute: + genDistributeOp(converter, semaCtx, eval, /*isComposite=*/false, + currentLocation, beginClauseList, + /*outerCombined=*/true); + break; + case llvm::omp::Directive::OMPD_distribute_parallel_do: + genCompositeDistributeParallelDo(converter, semaCtx, eval, + beginClauseList, endClauseList, + currentLocation); + break; + case llvm::omp::Directive::OMPD_distribute_parallel_do_simd: + genCompositeDistributeParallelDoSimd(converter, semaCtx, eval, + beginClauseList, endClauseList, + currentLocation); + break; + case llvm::omp::Directive::OMPD_distribute_simd: + genCompositeDistributeSimd(converter, semaCtx, eval, beginClauseList, + endClauseList, currentLocation); + break; + default: + llvm_unreachable("Unexpected DISTRIBUTE construct"); + } + } else if (llvm::omp::topTaskloopSet.test(ompDirective)) { + switch (ompDirective) { + case llvm::omp::Directive::OMPD_taskloop_simd: + genCompositeTaskLoopSimd(converter, semaCtx, eval, beginClauseList, + endClauseList, currentLocation); + break; + case llvm::omp::Directive::OMPD_taskloop: + genTaskLoopOp(converter, semaCtx, eval, /*isComposite=*/false, + currentLocation, beginClauseList); + break; + default: + llvm_unreachable("Unexpected TASKLOOP construct"); + } + } else if (ompDirective == llvm::omp::Directive::OMPD_simd) { + genSimdLoopOp(converter, semaCtx, eval, /*isComposite=*/false, + currentLocation, beginClauseList); + } else if (!llvm::omp::topDoSet.test(ompDirective)) { + TODO(currentLocation, + "Unhandled loop directive (" + + llvm::omp::getOpenMPDirectiveName(origDirective) + ")"); } } - if ((llvm::omp::allDoSet | llvm::omp::allSimdSet).test(ompDirective)) - validDirective = true; - if (!validDirective) { + if (llvm::omp::topDoSet.test(ompDirective)) { + switch (ompDirective) { + case llvm::omp::Directive::OMPD_do_simd: + genCompositeDoSimd(converter, semaCtx, eval, beginClauseList, + endClauseList, currentLocation); + break; + case llvm::omp::Directive::OMPD_do: + genWsLoopOp(converter, semaCtx, eval, /*isComposite=*/false, + currentLocation, beginClauseList, endClauseList); + break; + default: + llvm_unreachable("Unexpected DO construct"); + } + } else if (llvm::omp::allParallelSet.test(origDirective)) { TODO(currentLocation, "Unhandled loop directive (" + - llvm::omp::getOpenMPDirectiveName(ompDirective) + + llvm::omp::getOpenMPDirectiveName(origDirective) + ")"); } - if (llvm::omp::allDoSimdSet.test(ompDirective)) { - // 2.9.3.2 Workshare SIMD construct - createSimdWsLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList, - endClauseList, currentLocation); + // Create inner loop nest and body. + mlir::omp::LoopNestOp loopNestOp = + genLoopNestOp(converter, semaCtx, eval, currentLocation, beginClauseList, + clauseOps, dsp); - } else if (llvm::omp::allSimdSet.test(ompDirective)) { - // 2.9.3.1 SIMD construct - createSimdLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList, - currentLocation); - genOpenMPReduction(converter, semaCtx, loopOpClauseList); - } else { - createWsLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList, - endClauseList, currentLocation); + if (ompDirective == llvm::omp::Directive::OMPD_simd) + genOpenMPReduction(converter, semaCtx, beginClauseList); + + // Create trip_count outside of omp.target if this is host compilation and the + // loop is inside of a target region. + auto offloadMod = llvm::dyn_cast( + converter.getModuleOp().getOperation()); + auto targetOp = loopNestOp->getParentOfType(); + + if (offloadMod && targetOp && !offloadMod.getIsTargetDevice() && + targetOp.isTargetSPMDLoop()) { + // Lower loop bounds and step, and process collapsing again, putting lowered + // values outside of omp.target this time. This enables calculating and + // accessing the trip count in the host, which is needed when lowering to + // LLVM IR via the OMPIRBuilder. + HostClausesInsertionGuard guard(converter.getFirOpBuilder()); + CollapseClauseOps collapseOps; + ClauseProcessor(converter, semaCtx, beginClauseList) + .processCollapse(currentLocation, eval, collapseOps); + targetOp.getTripCountMutable().assign( + calculateTripCount(converter, currentLocation, collapseOps.loopLBVar, + collapseOps.loopUBVar, collapseOps.loopStepVar)); } } @@ -2066,6 +2509,9 @@ genOMP(Fortran::lower::AbstractConverter &converter, const auto &endClauseList = std::get(endBlockDirective.t); + assert(llvm::omp::blockConstructSet.test(directive.v) && + "Expected block construct"); + for (const Fortran::parser::OmpClause &clause : beginClauseList.v) { mlir::Location clauseLocation = converter.genLocation(clause.source); if (!std::get_if(&clause.u) && @@ -2106,11 +2552,11 @@ genOMP(Fortran::lower::AbstractConverter &converter, break; case llvm::omp::Directive::OMPD_ordered: genOrderedRegionOp(converter, semaCtx, eval, /*genNested=*/true, - currentLocation); + currentLocation, beginClauseList); break; case llvm::omp::Directive::OMPD_parallel: genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/true, - currentLocation, beginClauseList); + /*isComposite=*/false, currentLocation, beginClauseList); break; case llvm::omp::Directive::OMPD_single: genSingleOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, @@ -2118,7 +2564,7 @@ genOMP(Fortran::lower::AbstractConverter &converter, break; case llvm::omp::Directive::OMPD_target: genTargetOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, - beginClauseList, directive.v); + beginClauseList); break; case llvm::omp::Directive::OMPD_target_data: genDataOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, @@ -2153,29 +2599,25 @@ genOMP(Fortran::lower::AbstractConverter &converter, // Codegen for combined directives bool combinedDirective = false; - if ((llvm::omp::allTargetSet & llvm::omp::blockConstructSet) - .test(directive.v)) { + if (llvm::omp::allTargetSet.test(directive.v)) { genTargetOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, - beginClauseList, directive.v, - /*outerCombined=*/true); + beginClauseList, /*outerCombined=*/true); combinedDirective = true; } - if ((llvm::omp::allTeamsSet & llvm::omp::blockConstructSet) - .test(directive.v)) { + if (llvm::omp::allTeamsSet.test(directive.v)) { genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, beginClauseList); combinedDirective = true; } - if ((llvm::omp::allParallelSet & llvm::omp::blockConstructSet) - .test(directive.v)) { + if (llvm::omp::allParallelSet.test(directive.v)) { bool outerCombined = directive.v != llvm::omp::Directive::OMPD_target_parallel; genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, - currentLocation, beginClauseList, outerCombined); + /*isComposite=*/false, currentLocation, beginClauseList, + outerCombined); combinedDirective = true; } - if ((llvm::omp::workShareSet & llvm::omp::blockConstructSet) - .test(directive.v)) { + if (llvm::omp::workShareSet.test(directive.v)) { genSingleOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, beginClauseList, endClauseList); combinedDirective = true; @@ -2194,38 +2636,13 @@ genOMP(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPCriticalConstruct &criticalConstruct) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::Location currentLocation = converter.getCurrentLocation(); - mlir::IntegerAttr hintClauseOp; - std::string name; - const Fortran::parser::OmpCriticalDirective &cd = + const auto &cd = std::get(criticalConstruct.t); - if (std::get>(cd.t).has_value()) { - name = - std::get>(cd.t).value().ToString(); - } - const auto &clauseList = std::get(cd.t); - ClauseProcessor(converter, semaCtx, clauseList).processHint(hintClauseOp); - - mlir::omp::CriticalOp criticalOp = [&]() { - if (name.empty()) { - return firOpBuilder.create( - currentLocation, mlir::FlatSymbolRefAttr()); - } - mlir::ModuleOp module = firOpBuilder.getModule(); - mlir::OpBuilder modBuilder(module.getBodyRegion()); - auto global = module.lookupSymbol(name); - if (!global) - global = modBuilder.create( - currentLocation, - mlir::StringAttr::get(firOpBuilder.getContext(), name), hintClauseOp); - return firOpBuilder.create( - currentLocation, mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(), - global.getSymName())); - }(); - auto genInfo = OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval); - createBodyOfOp(criticalOp, genInfo); + const auto &name = std::get>(cd.t); + mlir::Location currentLocation = converter.getCurrentLocation(); + genCriticalOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, + clauseList, name); } static void @@ -2234,55 +2651,48 @@ genOMP(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPSectionsConstruct §ionsConstruct) { - mlir::Location currentLocation = converter.getCurrentLocation(); - llvm::SmallVector allocateOperands, allocatorOperands; - mlir::UnitAttr nowaitClauseOperand; const auto &beginSectionsDirective = std::get(sectionsConstruct.t); - const auto §ionsClauseList = + const auto &beginClauseList = std::get(beginSectionsDirective.t); - - // Process clauses before optional omp.parallel, so that new variables are - // allocated outside of the parallel region - ClauseProcessor cp(converter, semaCtx, sectionsClauseList); - cp.processSectionsReduction(currentLocation); - cp.processAllocate(allocatorOperands, allocateOperands); - llvm::omp::Directive dir = std::get(beginSectionsDirective.t) .v; + const auto §ionBlocks = + std::get(sectionsConstruct.t); + + // Process clauses before optional omp.parallel, so that new variables are + // allocated outside of the parallel region. + mlir::Location currentLocation = converter.getCurrentLocation(); + SectionsOpClauseOps clauseOps; + genSectionsClauses(converter, semaCtx, beginClauseList, currentLocation, + /*clausesFromBeginSections=*/true, clauseOps); - // Parallel wrapper of PARALLEL SECTIONS construct + // Parallel wrapper of PARALLEL SECTIONS construct. if (dir == llvm::omp::Directive::OMPD_parallel_sections) { genParallelOp(converter, symTable, semaCtx, eval, - /*genNested=*/false, currentLocation, sectionsClauseList, - /*outerCombined=*/true); + /*genNested=*/false, /*isComposite=*/false, currentLocation, + beginClauseList, /*outerCombined=*/true); } else { const auto &endSectionsDirective = std::get(sectionsConstruct.t); - const auto &endSectionsClauseList = + const auto &endClauseList = std::get(endSectionsDirective.t); - ClauseProcessor(converter, semaCtx, endSectionsClauseList) - .processNowait(nowaitClauseOperand); + genSectionsClauses(converter, semaCtx, endClauseList, currentLocation, + /*clausesFromBeginSections=*/false, clauseOps); } - // SECTIONS construct - genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(false), - /*reduction_vars=*/mlir::ValueRange(), - /*reductions=*/nullptr, allocateOperands, allocatorOperands, - nowaitClauseOperand); + // SECTIONS construct. + genSectionsOp(converter, semaCtx, eval, currentLocation, clauseOps); - const auto §ionBlocks = - std::get(sectionsConstruct.t); + // Generate nested SECTION operations recursively. auto &firOpBuilder = converter.getFirOpBuilder(); auto ip = firOpBuilder.saveInsertionPoint(); for (const auto &[nblock, neval] : llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) { symTable.pushScope(); genSectionOp(converter, semaCtx, neval, /*genNested=*/true, currentLocation, - sectionsClauseList); + beginClauseList); symTable.popScope(); firOpBuilder.restoreInsertionPoint(ip); } @@ -2366,12 +2776,12 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) { - llvm::SmallVector symbolAndClause; mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); + DeclareTargetOpClauseOps clauseOps; mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo( - converter, semaCtx, eval, declareTargetConstruct, symbolAndClause); + converter, semaCtx, eval, declareTargetConstruct, clauseOps); - for (const DeclareTargetCapturePair &symClause : symbolAndClause) { + for (const DeclareTargetCapturePair &symClause : clauseOps.symbolAndClause) { mlir::Operation *op = mod.lookupSymbol(converter.mangleName( std::get(symClause))); @@ -2482,11 +2892,10 @@ genOMP(Fortran::lower::AbstractConverter &converter, mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder, mlir::Operation *op, mlir::Location loc) { - if (mlir::isa(op)) + if (mlir::isa(op)) return builder.create(loc); - else - return builder.create(loc); + return builder.create(loc); } void Fortran::lower::genOpenMPConstruct( diff --git a/flang/lib/Lower/OpenMP/OperationClauses.h b/flang/lib/Lower/OpenMP/OperationClauses.h new file mode 100644 index 0000000000000..ed49c3f0750b1 --- /dev/null +++ b/flang/lib/Lower/OpenMP/OperationClauses.h @@ -0,0 +1,306 @@ +//===-- Lower/OpenMP/OperationClauses.h -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ +// +//===----------------------------------------------------------------------===// + +#include "Utils.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include + +namespace Fortran { +namespace semantics { +class Symbol; +} // namespace semantics +} // namespace Fortran + +namespace Fortran { +namespace lower { +namespace omp { + +//===----------------------------------------------------------------------===// +// Mixin structures defining operands associated with each OpenMP clause. +//===----------------------------------------------------------------------===// + +struct AlignedClauseOps { + llvm::SmallVector alignedVars; + llvm::SmallVector alignmentAttrs; +}; + +struct AllocateClauseOps { + llvm::SmallVector allocatorVars, allocateVars; +}; + +struct CollapseClauseOps { + llvm::SmallVector loopLBVar, loopUBVar, loopStepVar; + llvm::SmallVector loopIV; +}; + +struct CopyinClauseOps {}; + +struct CopyprivateClauseOps { + llvm::SmallVector copyprivateVars; + llvm::SmallVector copyprivateFuncs; +}; + +struct DependClauseOps { + llvm::SmallVector dependTypeAttrs; + llvm::SmallVector dependVars; +}; + +struct DeviceClauseOps { + mlir::Value deviceVar; +}; + +struct DeviceTypeClauseOps { + mlir::omp::DeclareTargetDeviceType deviceType; +}; + +struct DistScheduleClauseOps { + mlir::UnitAttr distScheduleStaticAttr; + mlir::Value distScheduleChunkSizeVar; +}; + +struct EnterLinkToClauseOps { + llvm::SmallVector symbolAndClause; +}; + +struct FinalClauseOps { + mlir::Value finalVar; +}; + +struct GrainsizeClauseOps { + mlir::Value grainsizeVar; +}; + +struct HintClauseOps { + mlir::IntegerAttr hintAttr; +}; + +struct IfClauseOps { + mlir::Value ifVar; +}; + +struct InReductionClauseOps { + llvm::SmallVector inReductionVars; + llvm::SmallVector inReductionTypes; + llvm::SmallVector inReductionDeclSymbols; + std::optional> + inReductionSymbols; +}; + +struct LinearClauseOps { + llvm::SmallVector linearVars, linearStepVars; +}; + +// The optional parameters - mapSymTypes, mapSymLocs & mapSymbols are used to +// store the original type, location and Fortran symbol for the map operands. +// They may be used later on to create the block_arguments for some of the +// target directives that require it. +struct MapClauseOps { + llvm::SmallVector mapVars; + std::optional> mapSymTypes; + std::optional> mapSymLocs; + std::optional> + mapSymbols; +}; + +struct MergeableClauseOps { + mlir::UnitAttr mergeableAttr; +}; + +struct NogroupClauseOps { + mlir::UnitAttr nogroupAttr; +}; + +struct NontemporalClauseOps { + llvm::SmallVector nontemporalVars; +}; + +struct NowaitClauseOps { + mlir::UnitAttr nowaitAttr; +}; + +struct NumTasksClauseOps { + mlir::Value numTasksVar; +}; + +struct NumTeamsClauseOps { + mlir::Value numTeamsLowerVar; + mlir::Value numTeamsUpperVar; +}; + +struct NumThreadsClauseOps { + mlir::Value numThreadsVar; +}; + +struct OrderClauseOps { + mlir::omp::ClauseOrderKindAttr orderAttr; +}; + +struct OrderedClauseOps { + mlir::IntegerAttr orderedAttr; +}; + +struct ParallelizationLevelClauseOps { + mlir::UnitAttr parLevelThreadsAttr; + mlir::UnitAttr parLevelSimdAttr; +}; + +struct PriorityClauseOps { + mlir::Value priorityVar; +}; + +struct PrivateClauseOps { + llvm::SmallVector privateVars; + llvm::SmallVector privatizers; +}; + +struct ProcBindClauseOps { + mlir::omp::ClauseProcBindKindAttr procBindKindAttr; +}; + +struct ReductionClauseOps { + llvm::SmallVector reductionVars; + llvm::SmallVector reductionTypes; + llvm::SmallVector reductionDeclSymbols; + std::optional> + reductionSymbols; +}; + +struct SafelenClauseOps { + mlir::IntegerAttr safelenAttr; +}; + +struct ScheduleClauseOps { + mlir::omp::ClauseScheduleKindAttr scheduleValAttr; + mlir::omp::ScheduleModifierAttr scheduleModAttr; + mlir::Value scheduleChunkVar; + mlir::UnitAttr scheduleSimdAttr; +}; + +struct SimdlenClauseOps { + mlir::IntegerAttr simdlenAttr; +}; + +struct TargetReductionClauseOps { + llvm::SmallVector targetReductionSymbols; +}; + +struct TaskReductionClauseOps { + llvm::SmallVector taskReductionVars; + llvm::SmallVector taskReductionTypes; + llvm::SmallVector taskReductionDeclSymbols; + std::optional> + taskReductionSymbols; +}; + +struct ThreadLimitClauseOps { + mlir::Value threadLimitVar; +}; + +struct UntiedClauseOps { + mlir::UnitAttr untiedAttr; +}; + +struct UseDeviceClauseOps { + llvm::SmallVector useDevicePtrVars; + llvm::SmallVector useDeviceAddrVars; + llvm::SmallVector useDeviceTypes; + llvm::SmallVector useDeviceLocs; + llvm::SmallVector useDeviceSymbols; +}; + +//===----------------------------------------------------------------------===// +// Structures defining clause operands associated with each OpenMP leaf +// construct. +// +// These mirror the arguments expected by the corresponding OpenMP MLIR ops. +//===----------------------------------------------------------------------===// + +namespace detail { +template +struct Clauses : public Mixins... {}; +} // namespace detail + +using CriticalDeclareOpClauseOps = detail::Clauses; + +using DataOpClauseOps = detail::Clauses; + +using DeclareTargetOpClauseOps = detail::Clauses; + +using DistributeOpClauseOps = + detail::Clauses; + +using EnterExitUpdateDataOpClauseOps = + detail::Clauses; + +using LoopNestOpClauseOps = detail::Clauses; + +// TODO Rename to "masked" +// TODO `filter` clause. +using MasterOpClauseOps = detail::Clauses<>; + +using OrderedRegionOpClauseOps = detail::Clauses; + +using ParallelOpClauseOps = + detail::Clauses; + +using SectionsOpClauseOps = + detail::Clauses; + +// TODO `linear` clause. +using SimdLoopOpClauseOps = + detail::Clauses; + +using SingleOpClauseOps = + detail::Clauses; + +// TODO `allocate`, `defaultmap`, `has_device_addr`, `in_reduction`, +// `is_device_ptr`, `uses_allocators` clauses. +using TargetOpClauseOps = + detail::Clauses; + +using TaskGroupOpClauseOps = + detail::Clauses; + +using TaskLoopOpClauseOps = + detail::Clauses; + +// TODO `affinity`, `detach` clauses. +using TaskOpClauseOps = + detail::Clauses; + +// TODO `depend`, `nowait` clauses. +using TaskWaitOpClauseOps = detail::Clauses<>; + +using TeamsOpClauseOps = + detail::Clauses; + +// TODO `allocate` clause. +using WsloopOpClauseOps = + detail::Clauses; + +} // namespace omp +} // namespace lower +} // namespace Fortran diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index 3b72148867874..2946d3ef2e6e1 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -60,7 +60,7 @@ ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType( void ReductionProcessor::addReductionSym( const Fortran::parser::OmpReductionClause &reduction, - llvm::SmallVector &symbols) { + llvm::SmallVectorImpl &symbols) { const auto &objectList{std::get(reduction.t)}; for (const Fortran::parser::OmpObject &ompObject : objectList.v) { diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/lib/Lower/OpenMP/ReductionProcessor.h index 85c286ead5282..abbd737084dc5 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.h +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.h @@ -101,7 +101,7 @@ class ReductionProcessor { static void addReductionSym( const Fortran::parser::OmpReductionClause &reduction, - llvm::SmallVector &symbols); + llvm::SmallVectorImpl &symbols); /// Creates an OpenMP reduction declaration and inserts it into the provided /// symbol table. The declaration has a constant initializer with the neutral diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index d1bf73ba1dfd2..2858412d77561 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -67,6 +68,24 @@ void genObjectList(const Fortran::parser::OmpObjectList &objectList, } } +mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter, + std::size_t loopVarTypeSize) { + // OpenMP runtime requires 32-bit or 64-bit loop variables. + loopVarTypeSize = loopVarTypeSize * 8; + if (loopVarTypeSize < 32) { + loopVarTypeSize = 32; + } else if (loopVarTypeSize > 64) { + loopVarTypeSize = 64; + mlir::emitWarning(converter.getCurrentLocation(), + "OpenMP loop iteration variable cannot have more than 64 " + "bits size and will be narrowed into 64 bits."); + } + assert((loopVarTypeSize == 32 || loopVarTypeSize == 64) && + "OpenMP loop iteration variable size must be transformed into 32-bit " + "or 64-bit"); + return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize); +} + void gatherFuncAndVarSyms( const Fortran::parser::OmpObjectList &objList, mlir::omp::DeclareTargetCaptureClause clause, diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 00b0165cec554..369654e546290 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -68,6 +68,9 @@ void genObjectList(const Fortran::parser::OmpObjectList &objectList, Fortran::lower::AbstractConverter &converter, llvm::SmallVectorImpl &operands); +mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter, + std::size_t loopVarTypeSize); + mlir::omp::TargetOp findParentTargetOp(mlir::OpBuilder &builder); } // namespace omp diff --git a/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp b/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp index f5a3d925ab5d9..e242dc5df6916 100644 --- a/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp @@ -105,9 +105,12 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { step.push_back(rewriter.clone(*stepOp)->getResult(0)); // ==== TODO (1) End ==== - auto wsLoopOp = rewriter.create( - doLoop.getLoc(), lowerBound, upperBound, step); - wsLoopOp.setInclusive(true); + auto wsLoopOp = rewriter.create(doLoop.getLoc()); + rewriter.createBlock(&wsLoopOp.getRegion()); + + // TODO Test that this didn't break something. + auto loopNestOp = rewriter.create( + doLoop.getLoc(), lowerBound, upperBound, step, /*inclusive=*/true); auto outlineableOp = mlir::dyn_cast(*parallelOp); @@ -180,11 +183,11 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { // Clone the loop's body inside the worksharing construct using the mapped // memref values. - rewriter.cloneRegionBefore(doLoop.getRegion(), wsLoopOp.getRegion(), - wsLoopOp.getRegion().begin(), mapper); + rewriter.cloneRegionBefore(doLoop.getRegion(), loopNestOp.getRegion(), + loopNestOp.getRegion().begin(), mapper); - mlir::Operation *terminator = wsLoopOp.getRegion().back().getTerminator(); - rewriter.setInsertionPointToEnd(&wsLoopOp.getRegion().back()); + mlir::Operation *terminator = loopNestOp.getRegion().back().getTerminator(); + rewriter.setInsertionPointToEnd(&loopNestOp.getRegion().back()); rewriter.create(terminator->getLoc()); rewriter.eraseOp(terminator); diff --git a/flang/test/Lower/OpenMP/FIR/copyin.f90 b/flang/test/Lower/OpenMP/FIR/copyin.f90 index 20023a81977ae..7161722b21090 100644 --- a/flang/test/Lower/OpenMP/FIR/copyin.f90 +++ b/flang/test/Lower/OpenMP/FIR/copyin.f90 @@ -145,7 +145,8 @@ subroutine copyin_derived_type() ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_4]] : !fir.ref ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { ! CHECK: fir.store %[[VAL_9]] to %[[VAL_3]] : !fir.ref ! CHECK: fir.call @_QPsub4(%[[VAL_4]]) {{.*}}: (!fir.ref) -> () ! CHECK: omp.yield @@ -286,7 +287,8 @@ subroutine common_1() !CHECK: %[[val_c1_i32:.*]] = arith.constant 1 : i32 !CHECK: %[[val_19:.*]] = fir.load %[[val_13]] : !fir.ref !CHECK: %[[val_c1_i32_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_19]]) inclusive step (%[[val_c1_i32_2]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_19]]) inclusive step (%[[val_c1_i32_2]]) { !CHECK: fir.store %[[arg]] to %[[val_9]] : !fir.ref !CHECK: %[[val_20:.*]] = fir.load %[[val_16]] : !fir.ref !CHECK: %[[val_21:.*]] = fir.load %[[val_9]] : !fir.ref @@ -303,7 +305,7 @@ subroutine common_2() integer :: y common /d/ x, y !$omp threadprivate(/d/) - + !$omp parallel do copyin(/d/) do i = 1, x y = y + i diff --git a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 index 389bcba35f77f..f779fc9a775cb 100644 --- a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 +++ b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 @@ -1,4 +1,4 @@ -! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s !CHECK: func.func @_QPlastprivate_common() { !CHECK: %[[val_0:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} @@ -17,7 +17,8 @@ !CHECK: %[[val_c1_i32:.*]] = arith.constant 1 : i32 !CHECK: %[[val_c100_i32:.*]] = arith.constant 100 : i32 !CHECK: %[[val_c1_i32_0:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_c100_i32]]) inclusive step (%[[val_c1_i32_0]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_c100_i32]]) inclusive step (%[[val_c1_i32_0]]) { !CHECK: fir.store %[[arg]] to %[[val_0]] : !fir.ref !CHECK: %[[val_11:.*]] = arith.addi %[[arg]], %[[val_c1_i32_0]] : i32 !CHECK: %[[val_c0_i32:.*]] = arith.constant 0 : i32 diff --git a/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90 index 2060e2062c1a3..68867a0bd4149 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90 @@ -9,16 +9,17 @@ !CHECK-DAG: %[[ARG1_REF:.*]] = fir.convert %[[ARG1_UNBOX]]#0 : (!fir.ref>) -> !fir.ref> !CHECK: omp.parallel { -!CHECK-DAG: %[[ARG1_PVT:.*]] = fir.alloca !fir.char<1,5> {bindc_name = "arg1", +!CHECK-DAG: %[[ARG1_PVT:.*]] = fir.alloca !fir.char<1,5> {bindc_name = "arg1", ! Check that we are accessing the clone inside the loop -!CHECK-DAG: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK-DAG: omp.wsloop { +!CHECK-DAG: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { !CHECK-DAG: %[[UNIT:.*]] = arith.constant 6 : i32 !CHECK-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQclX -!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] +!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] !CHECK-NEXT: %[[CNST:.*]] = arith.constant !CHECK-NEXT: %[[CALL_BEGIN_IO:.*]] = fir.call @_FortranAioBeginExternalListOutput(%[[UNIT]], %[[CVT0]], %[[CNST]]) {{.*}}: (i32, !fir.ref, i32) -> !fir.ref -!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT]] +!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT]] !CHECK-NEXT: %[[CVT_0_2:.*]] = fir.convert %[[FIVE]] !CHECK-NEXT: %[[CALL_OP_ASCII:.*]] = fir.call @_FortranAioOutputAscii(%[[CALL_BEGIN_IO]], %[[CVT_0_1]], %[[CVT_0_2]]) !CHECK-NEXT: %[[CALL_END_IO:.*]] = fir.call @_FortranAioEndIoStatement(%[[CALL_BEGIN_IO]]) @@ -37,12 +38,12 @@ !CHECK-DAG: %[[CVT:.*]] = fir.convert %[[ARG1_REF]] : (!fir.ref>) -> !fir.ref !CHECK-DAG: %[[CVT1:.*]] = fir.convert %[[ARG1_PVT]] : (!fir.ref>) -> !fir.ref !CHECK-DAG: fir.call @llvm.memmove.p0.p0.i64(%[[CVT]], %[[CVT1]]{{.*}}) -!CHECK-DAG: } +!CHECK-DAG: } !CHECK-DAG: omp.yield subroutine lastprivate_character(arg1) character(5) :: arg1 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1) do n = 1, 5 arg1(n:n) = 'c' @@ -55,7 +56,8 @@ subroutine lastprivate_character(arg1) !CHECK: func @_QPlastprivate_int(%[[ARG1:.*]]: !fir.ref {fir.bindc_name = "arg1"}) { !CHECK-DAG: omp.parallel { !CHECK-DAG: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "arg1" -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -75,7 +77,7 @@ subroutine lastprivate_character(arg1) subroutine lastprivate_int(arg1) integer :: arg1 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1) do n = 1, 5 arg1 = 2 @@ -90,7 +92,8 @@ subroutine lastprivate_int(arg1) !CHECK: omp.parallel { !CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1" !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -111,7 +114,7 @@ subroutine lastprivate_int(arg1) subroutine mult_lastprivate_int(arg1, arg2) integer :: arg1, arg2 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1) LASTPRIVATE(arg2) do n = 1, 5 arg1 = 2 @@ -127,7 +130,8 @@ subroutine mult_lastprivate_int(arg1, arg2) !CHECK: omp.parallel { !CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1" !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { !Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -148,7 +152,7 @@ subroutine mult_lastprivate_int(arg1, arg2) subroutine mult_lastprivate_int2(arg1, arg2) integer :: arg1, arg2 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1, arg2) do n = 1, 5 arg1 = 2 @@ -169,7 +173,8 @@ subroutine mult_lastprivate_int2(arg1, arg2) ! Lastprivate Allocation !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" !CHECK-NOT: omp.barrier -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -188,7 +193,7 @@ subroutine mult_lastprivate_int2(arg1, arg2) subroutine firstpriv_lastpriv_int(arg1, arg2) integer :: arg1, arg2 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg2) do n = 1, 5 arg1 = 2 @@ -207,7 +212,8 @@ subroutine firstpriv_lastpriv_int(arg1, arg2) !CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1]] : !fir.ref !CHECK-NEXT: fir.store %[[FPV_LD]] to %[[CLONE1]] : !fir.ref !CHECK-NEXT: omp.barrier -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 !CHECK: %[[C0:.*]] = arith.constant 0 : i32 @@ -225,7 +231,7 @@ subroutine firstpriv_lastpriv_int(arg1, arg2) subroutine firstpriv_lastpriv_int2(arg1) integer :: arg1 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg1) do n = 1, 5 arg1 = 2 diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 index c99bf761333b8..ce46925327ba1 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 @@ -13,7 +13,8 @@ ! CHECK: %[[ONE:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_4:.*]] : !fir.ref ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) { ! CHECK: fir.store %[[VAL_6]] to %[[PRIV_I]] : !fir.ref ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 index 8f5d280943cc2..204909e30cb96 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 @@ -259,7 +259,8 @@ subroutine simple_loop_1 ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 ! FIRDialect: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! FIRDialect: omp.wsloop { + ! FIRDialect: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO do i=1, 9 ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref @@ -295,7 +296,8 @@ subroutine simple_loop_2 ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 ! FIRDialect: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! FIRDialect: omp.wsloop { + ! FIRDialect: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO PRIVATE(r) do i=1, 9 ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref @@ -330,7 +332,8 @@ subroutine simple_loop_3 ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 ! FIRDialect: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! FIRDialect: omp.wsloop { + ! FIRDialect: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO PRIVATE(r) do i=1, 9 ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 index 6eb39a2f63725..b26e618693316 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 @@ -3,7 +3,7 @@ ! RUN: bbc -fopenmp -emit-fir -hlfir=false %s -o - | FileCheck %s -! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}) +! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}) subroutine omp_do_firstprivate(a) integer::a integer::n @@ -17,7 +17,8 @@ subroutine omp_do_firstprivate(a) ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE]] : !fir.ref ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK-NEXT: omp.wsloop for (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + ! CHECK-NEXT: omp.wsloop { + ! CHECK-NEXT: omp.loopnest (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) ! CHECK-NEXT: fir.store %[[ARG1]] to %[[REF]] : !fir.ref ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK-NEXT: omp.yield @@ -29,7 +30,7 @@ subroutine omp_do_firstprivate(a) call bar(a) end subroutine omp_do_firstprivate -! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "n"}) +! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "n"}) subroutine omp_do_firstprivate2(a, n) integer::a integer::n @@ -48,7 +49,8 @@ subroutine omp_do_firstprivate2(a, n) ! CHECK: %[[LB:.*]] = fir.load %[[CLONE]] : !fir.ref ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE1]] : !fir.ref ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK-NEXT: omp.wsloop for (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + ! CHECK-NEXT: omp.wsloop { + ! CHECK-NEXT: omp.loopnest (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) ! CHECK-NEXT: fir.store %[[ARG2]] to %[[REF]] : !fir.ref ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK-NEXT: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 index 8649cf284ffd9..b1003253822da 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 @@ -9,7 +9,8 @@ subroutine simple_parallel_do ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref @@ -35,7 +36,8 @@ subroutine parallel_do_with_parallel_clauses(cond, nt) ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref @@ -58,7 +60,8 @@ subroutine parallel_do_with_clauses(nt) ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop schedule(dynamic) { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref @@ -90,7 +93,8 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt) ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref @@ -140,7 +144,8 @@ end subroutine parallel_private_do ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV]] : !fir.ref ! CHECK: fir.call @_QPfoo(%[[I_PRIV]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref, !fir.ref>, !fir.ref) -> () ! CHECK: omp.yield @@ -182,7 +187,8 @@ end subroutine omp_parallel_multiple_firstprivate_do ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref ! CHECK: fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK: omp.yield @@ -224,7 +230,8 @@ end subroutine parallel_do_private ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref ! CHECK: fir.call @_QPfoo(%[[I_PRIV_ADDR]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref, !fir.ref>, !fir.ref) -> () ! CHECK: omp.yield @@ -266,7 +273,8 @@ end subroutine omp_parallel_do_multiple_firstprivate ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref ! CHECK: fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 index d6c10bdee88d5..2c6f460ceacac 100644 --- a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 +++ b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 @@ -77,7 +77,8 @@ subroutine test_stop_in_region3() ! CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { ! CHECK: fir.store %[[VAL_6]] to %[[VAL_0]] : !fir.ref ! CHECK: cf.br ^bb1 ! CHECK: ^bb1: diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90 index 3962603572ba0..9e113e2e59d8c 100644 --- a/flang/test/Lower/OpenMP/FIR/target.f90 +++ b/flang/test/Lower/OpenMP/FIR/target.f90 @@ -487,7 +487,8 @@ subroutine omp_target_parallel_do !CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 !CHECK: %[[VAL_6:.*]] = arith.constant 1024 : i32 !CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 - !CHECK: omp.wsloop for (%[[VAL_8:.*]]) : i32 = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_7]]) { + !CHECK: omp.wsloop { + !CHECK: omp.loopnest (%[[VAL_8:.*]]) : i32 = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_7]]) { !CHECK: fir.store %[[VAL_8]] to %[[VAL_4]] : !fir.ref !CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 !CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_4]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/unstructured.f90 b/flang/test/Lower/OpenMP/FIR/unstructured.f90 index bfaf38b7ef1af..390b3a8746e8b 100644 --- a/flang/test/Lower/OpenMP/FIR/unstructured.f90 +++ b/flang/test/Lower/OpenMP/FIR/unstructured.f90 @@ -67,14 +67,16 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct ! CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 ! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb4 ! CHECK: ^bb2: // pred: ^bb1 -! CHECK: omp.wsloop for (%[[ARG1:.*]]) : {{.*}} { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG1:.*]]) : {{.*}} { ! CHECK: fir.store %[[ARG1]] to %[[ALLOCA_2]] : !fir.ref ! CHECK: @_FortranAioBeginExternalListOutput ! CHECK: %[[LOAD_1:.*]] = fir.load %[[ALLOCA_2]] : !fir.ref ! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]]) ! CHECK: omp.yield ! CHECK: } -! CHECK: omp.wsloop for (%[[ARG2:.*]]) : {{.*}} { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG2:.*]]) : {{.*}} { ! CHECK: fir.store %[[ARG2]] to %[[ALLOCA_1]] : !fir.ref ! CHECK: br ^bb1 ! CHECK: ^bb2: // 2 preds: ^bb1, ^bb5 @@ -117,7 +119,8 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs ! CHECK-LABEL: func @_QPss4{{.*}} { ! CHECK: omp.parallel { ! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned} -! CHECK: omp.wsloop for (%[[ARG:.*]]) : {{.*}} { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG:.*]]) : {{.*}} { ! CHECK: fir.store %[[ARG]] to %[[ALLOCA]] : !fir.ref ! CHECK: %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}} ! CHECK: %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}} diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 index 4030f46299d0b..4b6498adb31de 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 @@ -19,7 +19,8 @@ program wsloop ! CHECK: %[[VAL_3:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 4 : i32 -! CHECK: omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait for (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { +! CHECK: omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait { +! CHECK: omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { ! CHECK: fir.store %[[ARG0]] to %[[STORE_IV:.*]] : !fir.ref ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]] : !fir.ref ! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 @@ -37,7 +38,8 @@ program wsloop ! CHECK: %[[VAL_15:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_17:.*]] = arith.constant 4 : i32 -! CHECK: omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait for (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) { +! CHECK: omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait { +! CHECK: omp.loopnest (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) { ! CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref ! CHECK: %[[VAL_24:.*]] = arith.constant 2 : i32 ! CHECK: %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref @@ -45,7 +47,7 @@ program wsloop ! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) {{.*}}: (!fir.ref, i32) -> i1 ! CHECK: omp.yield ! CHECK: } - + end do !$OMP END DO NOWAIT chunk = 6 @@ -61,7 +63,8 @@ program wsloop ! CHECK: %[[VAL_30:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_31:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_0]] : !fir.ref -! CHECK: omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait for (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) { +! CHECK: omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait { +! CHECK: omp.loopnest (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) { ! CHECK: fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref ! CHECK: %[[VAL_39:.*]] = arith.constant 3 : i32 ! CHECK: %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 index 933fc0910e338..aa66b49edf9ec 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 @@ -39,7 +39,8 @@ program wsloop_collapse do i = 1, a do j= 1, b do k = 1, c -! CHECK: omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) { ! CHECK: fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref ! CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref ! CHECK: fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 index 1c381475f6cbb..70f0e7d00b4f9 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 @@ -15,7 +15,8 @@ program wsloop_dynamic !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop schedule(dynamic, monotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) +!CHECK: omp.wsloop schedule(dynamic, monotonic) nowait { +!CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !CHECK: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref do i=1, 9 diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 index 3f425200b8fa4..9170a75b8248e 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 @@ -16,7 +16,8 @@ program wsloop_dynamic !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop schedule(dynamic, nonmonotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) +!CHECK: omp.wsloop schedule(dynamic, nonmonotonic) nowait { +!CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !CHECK: fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref do i=1, 9 diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90 index 7548d7a597228..9b3daba2f170b 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90 @@ -6,7 +6,8 @@ subroutine wsloop_ordered_no_para() integer :: a(10), i -! CHECK: omp.wsloop ordered(0) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { +! CHECK: omp.wsloop ordered(0) { +! CHECK: omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { ! CHECK: omp.yield ! CHECK: } @@ -25,7 +26,8 @@ subroutine wsloop_ordered_with_para() integer :: a(10), i ! CHECK: func @_QPwsloop_ordered_with_para() { -! CHECK: omp.wsloop ordered(1) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { +! CHECK: omp.wsloop ordered(1) { +! CHECK: omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { ! CHECK: omp.yield ! CHECK: } diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 index 5664529416fe8..a620cd4852965 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 @@ -61,7 +61,8 @@ ! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) for (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { ! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref ! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref @@ -97,7 +98,8 @@ subroutine simple_int_reduction ! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) for (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { ! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref ! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref @@ -133,7 +135,8 @@ subroutine simple_real_reduction ! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) for (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { ! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_7]] : !fir.ref @@ -210,7 +213,8 @@ subroutine simple_real_reduction_switch_order ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref) for (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { ! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref ! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref @@ -263,7 +267,8 @@ subroutine multiple_int_reductions_same_type ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref) for (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { ! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref ! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref @@ -322,7 +327,8 @@ subroutine multiple_real_reductions_same_type ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_2]] -> %[[VAL_13:.*]] : !fir.ref, @add_reduction_i_64 %[[VAL_3]] -> %[[VAL_14:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_4]] -> %[[VAL_15:.*]] : !fir.ref, @add_reduction_f_64 %[[VAL_1]] -> %[[VAL_16:.*]] : !fir.ref) for (%[[VAL_17:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_2]] -> %[[VAL_13:.*]] : !fir.ref, @add_reduction_i_64 %[[VAL_3]] -> %[[VAL_14:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_4]] -> %[[VAL_15:.*]] : !fir.ref, @add_reduction_f_64 %[[VAL_1]] -> %[[VAL_16:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_17:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) { ! CHECK: fir.store %[[VAL_17]] to %[[VAL_9]] : !fir.ref ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_13]] : !fir.ref ! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_9]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90 index 9ce1725dbab04..c5401204c0ca6 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90 @@ -13,7 +13,8 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iandEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[IAND_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[IAND_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90 index f6027416246af..7775bb4c2dc97 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90 @@ -13,7 +13,8 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_ieorEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90 index bc143611abe8d..cef02492931c1 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90 @@ -10,10 +10,11 @@ !CHECK: omp.yield(%[[IOR_VAL_I]] : i32) !CHECK-LABEL: @_QPreduction_ior -!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> +!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iorEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[IOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[IOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 index d5aacd74d8b10..7592da1a8844d 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 @@ -30,7 +30,8 @@ ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { ! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref> ! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref @@ -72,7 +73,8 @@ subroutine simple_reduction(y) ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { ! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64 @@ -122,7 +124,8 @@ subroutine simple_reduction_switch_order(y) ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref>) for (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref ! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref> ! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 index 9f44e0e26d407..b8cdf4bf46c7f 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 @@ -31,7 +31,8 @@ ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { ! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref> ! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref @@ -73,7 +74,8 @@ subroutine simple_reduction(y) ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { ! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64 @@ -123,7 +125,8 @@ subroutine simple_reduction_switch_order(y) ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref>) for (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref ! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref> ! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 index af79658491b56..85702c5dc020d 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 @@ -21,7 +21,8 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[MAX_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[MAX_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref @@ -34,7 +35,8 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_max_realEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[MAX_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[MAX_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 index 1095718b4b13f..b82d943009e90 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 @@ -21,7 +21,8 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_min_intEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[MIN_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[MIN_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref @@ -35,7 +36,8 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_min_realEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[MIN_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[MIN_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 index 2e3f8ca3c207d..446ad4279a682 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 @@ -14,7 +14,8 @@ program wsloop_dynamic !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop schedule(runtime, simd) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) +!CHECK: omp.wsloop schedule(runtime, simd) nowait { +!CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !CHECK: fir.store %[[I]] to %[[STORE:.*]] : !fir.ref do i=1, 9 diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 index 4f34f30f3e7c9..f00b7ec0979cb 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 @@ -22,7 +22,8 @@ program wsloop_variable !CHECK: %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64 !CHECK: %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64 !CHECK: %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64 -!CHECK: omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16 !CHECK: fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]] : !fir.ref !CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref @@ -46,7 +47,8 @@ program wsloop_variable !CHECK: %[[TMP12:.*]] = arith.constant 1 : i32 !CHECK: %[[TMP13:.*]] = fir.convert %{{.*}} : (i8) -> i32 !CHECK: %[[TMP14:.*]] = fir.convert %{{.*}} : (i64) -> i32 -!CHECK: omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]]) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16 !CHECK: fir.store %[[ARG0_I16]] to %[[STORE3:.*]] : !fir.ref !CHECK: %[[LOAD3:.*]] = fir.load %[[STORE3]] : !fir.ref @@ -65,7 +67,8 @@ program wsloop_variable !CHECK: %[[TMP17:.*]] = fir.convert %{{.*}} : (i8) -> i64 !CHECK: %[[TMP18:.*]] = fir.convert %{{.*}} : (i16) -> i64 !CHECK: %[[TMP19:.*]] = fir.convert %{{.*}} : (i32) -> i64 -!CHECK: omp.wsloop for (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]]) { !CHECK: %[[ARG1_I128:.*]] = fir.convert %[[ARG1]] : (i64) -> i128 !CHECK: fir.store %[[ARG1_I128]] to %[[STORE4:.*]] : !fir.ref !CHECK: %[[LOAD4:.*]] = fir.load %[[STORE4]] : !fir.ref @@ -97,7 +100,8 @@ end program wsloop_variable !CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref !CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_8]] : (i8) -> i32 !CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_9]] : (i16) -> i32 -!CHECK: omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16 !CHECK: fir.store %[[ARG0_I16]] to %[[STORE_IV:.*]] : !fir.ref !CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_0]] : !fir.ref @@ -146,7 +150,8 @@ subroutine wsloop_variable_sub !CHECK: %[[C1:.*]] = arith.constant 1 : i32 !CHECK: %[[C10:.*]] = arith.constant 10 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[C1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[C1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) { !CHECK: %[[ARG0_I8:.*]] = fir.convert %[[ARG0]] : (i32) -> i8 !CHECK: fir.store %[[ARG0_I8]] to %[[IV2]] : !fir.ref !CHECK: %[[IV2LOAD:.*]] = fir.load %[[IV2]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop.f90 b/flang/test/Lower/OpenMP/FIR/wsloop.f90 index abc0489b08ff5..66972c73d2eb0 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop.f90 @@ -11,7 +11,8 @@ subroutine simple_loop ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO do i=1, 9 ! CHECK: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref @@ -34,7 +35,8 @@ subroutine simple_loop_with_step ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 2 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) ! CHECK: fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref !$OMP DO @@ -57,7 +59,8 @@ subroutine loop_with_schedule_nowait ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop schedule(runtime) nowait { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO SCHEDULE(runtime) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 index 5c624d31b5f36..4c084d81ffa89 100644 --- a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 +++ b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 @@ -12,7 +12,8 @@ ! CHECK: %[[const_1:.*]] = arith.constant 1 : i32 ! CHECK: %[[const_2:.*]] = arith.constant 10 : i32 ! CHECK: %[[const_3:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[ARG:.*]]) : i32 = (%[[const_1]]) to (%[[const_2]]) inclusive step (%[[const_3]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG:.*]]) : i32 = (%[[const_1]]) to (%[[const_2]]) inclusive step (%[[const_3]]) { ! CHECK: fir.store %[[ARG]] to %[[TEMP]] : !fir.ref ! EXPECTED: %[[temp_1:.*]] = fir.load %[[PRIVATE_Z]] : !fir.ref ! CHECK: %[[temp_1:.*]] = fir.load %{{.*}} : !fir.ref diff --git a/flang/test/Lower/OpenMP/default-clause.f90 b/flang/test/Lower/OpenMP/default-clause.f90 index 0e118742689d6..401ed4f612467 100644 --- a/flang/test/Lower/OpenMP/default-clause.f90 +++ b/flang/test/Lower/OpenMP/default-clause.f90 @@ -192,7 +192,7 @@ subroutine nested_default_clause_tests !CHECK: %[[INNER_PRIVATE_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFnested_default_clause_testsEk"} !CHECK: %[[INNER_PRIVATE_K_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_K]] {uniq_name = "_QFnested_default_clause_testsEk"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_K_DECL]]#0 : !fir.ref -!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_K_DECL]]#0 temporary_lhs : i32, !fir.ref +!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_K_DECL]]#0 temporary_lhs : i32, !fir.ref !CHECK: %[[CONST:.*]] = arith.constant 30 : i32 !CHECK: hlfir.assign %[[CONST]] to %[[PRIVATE_Y_DECL]]#0 : i32, !fir.ref !CHECK: %[[CONST:.*]] = arith.constant 40 : i32 @@ -205,21 +205,21 @@ subroutine nested_default_clause_tests !CHECK: } !CHECK: omp.terminator !CHECK: } - !$omp parallel firstprivate(x) private(y) shared(w) default(private) + !$omp parallel firstprivate(x) private(y) shared(w) default(private) !$omp parallel default(private) y = 20 - x = 10 - !$omp end parallel + x = 10 + !$omp end parallel - !$omp parallel default(firstprivate) shared(y) private(w) + !$omp parallel default(firstprivate) shared(y) private(w) y = 30 - w = 40 + w = 40 z = 50 k = 40 !$omp end parallel !$omp end parallel - - + + !CHECK: omp.parallel { !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"} @@ -260,8 +260,8 @@ subroutine nested_default_clause_tests !$omp parallel default(private) shared(z) w = x + z !$omp end parallel - !$omp end parallel - + !$omp end parallel + !CHECK: omp.parallel { !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"} !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -319,7 +319,7 @@ subroutine nested_default_clause_tests !CHECK: omp.terminator !CHECK: } !CHECK: return -!CHECK: } +!CHECK: } !$omp parallel default(firstprivate) !$omp single x = y @@ -352,7 +352,8 @@ subroutine skipped_default_clause_checks() type(it)::iii !CHECK: omp.parallel { -!CHECK: omp.wsloop reduction(@min_i_32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref) for (%[[ARG:.*]]) {{.*}} { +!CHECK: omp.wsloop reduction(@min_i_32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest (%[[ARG:.*]]) {{.*}} { !CHECK: omp.yield !CHECK: } !CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/hlfir-wsloop.f90 b/flang/test/Lower/OpenMP/hlfir-wsloop.f90 index b6be77fe3016d..307be89fb5ecb 100644 --- a/flang/test/Lower/OpenMP/hlfir-wsloop.f90 +++ b/flang/test/Lower/OpenMP/hlfir-wsloop.f90 @@ -12,7 +12,8 @@ subroutine simple_loop !$OMP PARALLEL ! CHECK-DAG: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: %[[IV:.*]] = fir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref) -> !fir.ref - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_ST]]) to (%[[WS_END]]) inclusive step (%[[WS_ST]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_ST]]) to (%[[WS_END]]) inclusive step (%[[WS_ST]]) !$OMP DO do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV:.*]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 index a11bdee156637..45450a5a8fd9b 100644 --- a/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 +++ b/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 @@ -15,7 +15,8 @@ !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X_REF]] {uniq_name = "_QFlastprivate_commonEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[PRIVATE_Y_REF:.*]] = fir.alloca f32 {bindc_name = "y", pinned, uniq_name = "_QFlastprivate_commonEy"} !CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y_REF]] {uniq_name = "_QFlastprivate_commonEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { !CHECK: %[[V:.*]] = arith.addi %[[I]], %{{.*}} : i32 !CHECK: %[[C0:.*]] = arith.constant 0 : i32 !CHECK: %[[NEG_STEP:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32 diff --git a/flang/test/Lower/OpenMP/lastprivate-iv.f90 b/flang/test/Lower/OpenMP/lastprivate-iv.f90 index 70fe500129d12..16204ed79b019 100644 --- a/flang/test/Lower/OpenMP/lastprivate-iv.f90 +++ b/flang/test/Lower/OpenMP/lastprivate-iv.f90 @@ -9,7 +9,8 @@ !CHECK: %[[LB:.*]] = arith.constant 4 : i32 !CHECK: %[[UB:.*]] = arith.constant 10 : i32 !CHECK: %[[STEP:.*]] = arith.constant 3 : i32 -!CHECK: omp.wsloop for (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { !CHECK: fir.store %[[IV]] to %[[I]]#1 : !fir.ref !CHECK: %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32 !CHECK: %[[C0:.*]] = arith.constant 0 : i32 @@ -41,7 +42,8 @@ subroutine lastprivate_iv_inc() !CHECK: %[[LB:.*]] = arith.constant 10 : i32 !CHECK: %[[UB:.*]] = arith.constant 1 : i32 !CHECK: %[[STEP:.*]] = arith.constant -3 : i32 -!CHECK: omp.wsloop for (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { !CHECK: fir.store %[[IV]] to %[[I]]#1 : !fir.ref !CHECK: %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32 !CHECK: %[[C0:.*]] = arith.constant 0 : i32 diff --git a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 index 28f59c95d60bb..cf4f028987022 100644 --- a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 +++ b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 @@ -14,13 +14,14 @@ !CHECK-DAG: %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] typeparams %[[FIVE]] {uniq_name = "_QFlastprivate_characterEarg1"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) ! Check that we are accessing the clone inside the loop -!CHECK-DAG: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK-DAG: omp.wsloop { +!CHECK-DAG: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { !CHECK-DAG: %[[UNIT:.*]] = arith.constant 6 : i32 !CHECK-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQclX -!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] +!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] !CHECK-NEXT: %[[CNST:.*]] = arith.constant !CHECK-NEXT: %[[CALL_BEGIN_IO:.*]] = fir.call @_FortranAioBeginExternalListOutput(%[[UNIT]], %[[CVT0]], %[[CNST]]) {{.*}}: (i32, !fir.ref, i32) -> !fir.ref -!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT_DECL]]#1 +!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT_DECL]]#1 !CHECK-NEXT: %[[CVT_0_2:.*]] = fir.convert %[[FIVE]] !CHECK-NEXT: %[[CALL_OP_ASCII:.*]] = fir.call @_FortranAioOutputAscii(%[[CALL_BEGIN_IO]], %[[CVT_0_1]], %[[CVT_0_2]]) !CHECK-NEXT: %[[CALL_END_IO:.*]] = fir.call @_FortranAioEndIoStatement(%[[CALL_BEGIN_IO]]) @@ -37,12 +38,12 @@ ! Testing lastprivate val update !CHECK-DAG: hlfir.assign %[[ARG1_PVT_DECL]]#0 to %[[ARG1_DECL]]#0 temporary_lhs : !fir.ref>, !fir.ref> -!CHECK-DAG: } +!CHECK-DAG: } !CHECK-DAG: omp.yield subroutine lastprivate_character(arg1) character(5) :: arg1 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1) do n = 1, 5 arg1(n:n) = 'c' @@ -57,7 +58,8 @@ subroutine lastprivate_character(arg1) !CHECK-DAG: omp.parallel { !CHECK-DAG: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "arg1" !CHECK-DAG: %[[CLONE_DECL:.*]]:2 = hlfir.declare %[[CLONE]] {uniq_name = "_QFlastprivate_intEarg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -77,7 +79,7 @@ subroutine lastprivate_character(arg1) subroutine lastprivate_int(arg1) integer :: arg1 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1) do n = 1, 5 arg1 = 2 @@ -96,7 +98,8 @@ subroutine lastprivate_int(arg1) !CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_intEarg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" !CHECK-DAG: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFmult_lastprivate_intEarg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -117,7 +120,7 @@ subroutine lastprivate_int(arg1) subroutine mult_lastprivate_int(arg1, arg2) integer :: arg1, arg2 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1) LASTPRIVATE(arg2) do n = 1, 5 arg1 = 2 @@ -137,7 +140,8 @@ subroutine mult_lastprivate_int(arg1, arg2) !CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_int2Earg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" !CHECK-DAG: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFmult_lastprivate_int2Earg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { !Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -158,7 +162,7 @@ subroutine mult_lastprivate_int(arg1, arg2) subroutine mult_lastprivate_int2(arg1, arg2) integer :: arg1, arg2 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1, arg2) do n = 1, 5 arg1 = 2 @@ -183,7 +187,8 @@ subroutine mult_lastprivate_int2(arg1, arg2) !CHECK: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" !CHECK: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFfirstpriv_lastpriv_intEarg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK-NOT: omp.barrier -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -202,7 +207,7 @@ subroutine mult_lastprivate_int2(arg1, arg2) subroutine firstpriv_lastpriv_int(arg1, arg2) integer :: arg1, arg2 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg2) do n = 1, 5 arg1 = 2 @@ -223,7 +228,8 @@ subroutine firstpriv_lastpriv_int(arg1, arg2) !CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref !CHECK-NEXT: hlfir.assign %[[FPV_LD]] to %[[CLONE1_DECL]]#0 temporary_lhs : i32, !fir.ref !CHECK-NEXT: omp.barrier -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 !CHECK: %[[C0:.*]] = arith.constant 0 : i32 @@ -241,7 +247,7 @@ subroutine firstpriv_lastpriv_int(arg1, arg2) subroutine firstpriv_lastpriv_int2(arg1) integer :: arg1 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg1) do n = 1, 5 arg1 = 2 diff --git a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 index 8533106b7ac48..53846bef2d4f9 100644 --- a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 +++ b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 @@ -21,7 +21,8 @@ ! CHECK: %[[ONE:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_3:.*]] = fir.load %[[GAMA_DECL]]#0 : !fir.ref ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) { ! CHECK: fir.store %[[VAL_6]] to %[[PRIV_I_DECL]]#1 : !fir.ref ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index diff --git a/flang/test/Lower/OpenMP/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/parallel-private-clause.f90 index 5578b6710da7c..e870d8f8f0669 100644 --- a/flang/test/Lower/OpenMP/parallel-private-clause.f90 +++ b/flang/test/Lower/OpenMP/parallel-private-clause.f90 @@ -304,7 +304,8 @@ subroutine simple_loop_1 ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 ! FIRDialect: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! FIRDialect: omp.wsloop { + ! FIRDialect: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO do i=1, 9 ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV_DECL]]#1 : !fir.ref @@ -342,7 +343,8 @@ subroutine simple_loop_2 ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 ! FIRDialect: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! FIRDialect: omp.wsloop { + ! FIRDialect: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO PRIVATE(r) do i=1, 9 ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV_DECL]]#1 : !fir.ref @@ -379,7 +381,8 @@ subroutine simple_loop_3 ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 ! FIRDialect: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! FIRDialect: omp.wsloop { + ! FIRDialect: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO PRIVATE(r) do i=1, 9 ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV_DECL:.*]]#1 : !fir.ref diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 index 716a7d71bb628..9eb05978b2d4a 100644 --- a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 +++ b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 @@ -3,7 +3,7 @@ ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s -! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}) +! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}) subroutine omp_do_firstprivate(a) ! CHECK: %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_do_firstprivateEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) integer::a @@ -20,7 +20,8 @@ subroutine omp_do_firstprivate(a) ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK-NEXT: omp.wsloop for (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + ! CHECK-NEXT: omp.wsloop { + ! CHECK-NEXT: omp.loopnest (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) ! CHECK-NEXT: fir.store %[[ARG1]] to %[[I_PVT_DECL]]#1 : !fir.ref ! CHECK-NEXT: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK-NEXT: omp.yield @@ -32,7 +33,7 @@ subroutine omp_do_firstprivate(a) call bar(a) end subroutine omp_do_firstprivate -! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "n"}) +! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "n"}) subroutine omp_do_firstprivate2(a, n) ! CHECK: %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_do_firstprivate2Ea"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFomp_do_firstprivate2En"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -56,7 +57,8 @@ subroutine omp_do_firstprivate2(a, n) ! CHECK: %[[LB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref ! CHECK: %[[UB:.*]] = fir.load %[[N_PVT_DECL]]#0 : !fir.ref ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) ! CHECK: fir.store %[[ARG2]] to %[[I_PVT_DECL]]#1 : !fir.ref ! CHECK: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/parallel-wsloop.f90 index c06f941b74b58..fcb2791554ab0 100644 --- a/flang/test/Lower/OpenMP/parallel-wsloop.f90 +++ b/flang/test/Lower/OpenMP/parallel-wsloop.f90 @@ -9,7 +9,8 @@ subroutine simple_parallel_do ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref @@ -37,7 +38,8 @@ subroutine parallel_do_with_parallel_clauses(cond, nt) ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref @@ -61,7 +63,8 @@ subroutine parallel_do_with_clauses(nt) ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop schedule(dynamic) { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref @@ -97,7 +100,8 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt) ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref @@ -150,7 +154,8 @@ end subroutine parallel_private_do ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref ! CHECK: fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref>, !fir.ref) -> () ! CHECK: omp.yield @@ -196,7 +201,8 @@ end subroutine omp_parallel_multiple_firstprivate_do ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref ! CHECK: fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK: omp.yield @@ -241,7 +247,8 @@ end subroutine parallel_do_private ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref ! CHECK: fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_PRIV_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref>, !fir.ref) -> () ! CHECK: omp.yield @@ -287,7 +294,8 @@ end subroutine omp_parallel_do_multiple_firstprivate ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref ! CHECK: fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/stop-stmt-in-region.f90 b/flang/test/Lower/OpenMP/stop-stmt-in-region.f90 index fdbabc21b2c9e..fd80c563a8f4f 100644 --- a/flang/test/Lower/OpenMP/stop-stmt-in-region.f90 +++ b/flang/test/Lower/OpenMP/stop-stmt-in-region.f90 @@ -82,7 +82,8 @@ subroutine test_stop_in_region3() ! CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { ! CHECK: fir.store %[[VAL_6]] to %[[VAL_0_DECL]]#1 : !fir.ref ! CHECK: cf.br ^bb1 ! CHECK: ^bb1: diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90 index 43598aff08bfe..e1c2a1f0966f5 100644 --- a/flang/test/Lower/OpenMP/target.f90 +++ b/flang/test/Lower/OpenMP/target.f90 @@ -594,7 +594,8 @@ subroutine omp_target_parallel_do !$omp target parallel do map(tofrom: a) !CHECK: %[[I_PVT_ALLOCA:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_ALLOCA]] {uniq_name = "_QFomp_target_parallel_doEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) - !CHECK: omp.wsloop for (%[[I_VAL:.*]]) : i32 + !CHECK: omp.wsloop { + !CHECK: omp.loopnest (%[[I_VAL:.*]]) : i32 do i = 1, 1024 !CHECK: fir.store %[[I_VAL]] to %[[I_PVT_DECL]]#1 : !fir.ref !CHECK: %[[C10:.*]] = arith.constant 10 : i32 diff --git a/flang/test/Lower/OpenMP/unstructured.f90 b/flang/test/Lower/OpenMP/unstructured.f90 index e5bf980ce90fd..13d3c2bf04206 100644 --- a/flang/test/Lower/OpenMP/unstructured.f90 +++ b/flang/test/Lower/OpenMP/unstructured.f90 @@ -70,14 +70,16 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct ! CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 ! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb4 ! CHECK: ^bb2: // pred: ^bb1 -! CHECK: omp.wsloop for (%[[ARG1:.*]]) : {{.*}} { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG1:.*]]) : {{.*}} { ! CHECK: fir.store %[[ARG1]] to %[[OMP_LOOP_K_DECL]]#1 : !fir.ref ! CHECK: @_FortranAioBeginExternalListOutput ! CHECK: %[[LOAD_1:.*]] = fir.load %[[OMP_LOOP_K_DECL]]#0 : !fir.ref ! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]]) ! CHECK: omp.yield ! CHECK: } -! CHECK: omp.wsloop for (%[[ARG2:.*]]) : {{.*}} { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG2:.*]]) : {{.*}} { ! CHECK: fir.store %[[ARG2]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref ! CHECK: br ^bb1 ! CHECK: ^bb2: // 2 preds: ^bb1, ^bb5 @@ -121,7 +123,8 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs ! CHECK: omp.parallel { ! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: %[[OMP_LOOP_J_DECL:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFss4Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: omp.wsloop for (%[[ARG:.*]]) : {{.*}} { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG:.*]]) : {{.*}} { ! CHECK: fir.store %[[ARG]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref ! CHECK: %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}} ! CHECK: %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}} diff --git a/flang/test/Lower/OpenMP/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/wsloop-chunks.f90 index 5016c8985bda0..2fe787150de32 100644 --- a/flang/test/Lower/OpenMP/wsloop-chunks.f90 +++ b/flang/test/Lower/OpenMP/wsloop-chunks.f90 @@ -20,7 +20,8 @@ program wsloop ! CHECK: %[[VAL_3:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 4 : i32 -! CHECK: omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait for (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { +! CHECK: omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait { +! CHECK: omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { ! CHECK: fir.store %[[ARG0]] to %[[STORE_IV:.*]]#1 : !fir.ref ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]]#0 : !fir.ref ! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 @@ -38,7 +39,8 @@ program wsloop ! CHECK: %[[VAL_15:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_17:.*]] = arith.constant 4 : i32 -! CHECK: omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait for (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) { +! CHECK: omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait { +! CHECK: omp.loopnest (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) { ! CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref ! CHECK: %[[VAL_24:.*]] = arith.constant 2 : i32 ! CHECK: %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]]#0 : !fir.ref @@ -46,7 +48,7 @@ program wsloop ! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) {{.*}}: (!fir.ref, i32) -> i1 ! CHECK: omp.yield ! CHECK: } - + end do !$OMP END DO NOWAIT chunk = 6 @@ -62,7 +64,8 @@ program wsloop ! CHECK: %[[VAL_30:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_31:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_0]]#0 : !fir.ref -! CHECK: omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait for (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) { +! CHECK: omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait { +! CHECK: omp.loopnest (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) { ! CHECK: fir.store %[[ARG2]] to %[[STORE_IV2:.*]]#1 : !fir.ref ! CHECK: %[[VAL_39:.*]] = arith.constant 3 : i32 ! CHECK: %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/wsloop-collapse.f90 index c93fcf4ef968d..85e1134e68720 100644 --- a/flang/test/Lower/OpenMP/wsloop-collapse.f90 +++ b/flang/test/Lower/OpenMP/wsloop-collapse.f90 @@ -49,7 +49,8 @@ program wsloop_collapse !CHECK: %[[VAL_30:.*]] = arith.constant 1 : i32 !CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref !CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop for (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) { !$omp do collapse(3) do i = 1, a do j= 1, b diff --git a/flang/test/Lower/OpenMP/wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/wsloop-monotonic.f90 index fba9105b98181..5d0cfb337bb6e 100644 --- a/flang/test/Lower/OpenMP/wsloop-monotonic.f90 +++ b/flang/test/Lower/OpenMP/wsloop-monotonic.f90 @@ -15,7 +15,8 @@ program wsloop_dynamic !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop schedule(dynamic, monotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) +!CHECK: omp.wsloop schedule(dynamic, monotonic) nowait { +!CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !CHECK: fir.store %[[I]] to %[[ALLOCA_IV:.*]]#1 : !fir.ref do i=1, 9 diff --git a/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90 index 1bd7a2edc0f52..024c4ebf433fb 100644 --- a/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90 +++ b/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90 @@ -17,7 +17,8 @@ program wsloop_dynamic !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop schedule(dynamic, nonmonotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) +!CHECK: omp.wsloop schedule(dynamic, nonmonotonic) nowait { +!CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !CHECK: fir.store %[[I]] to %[[ALLOCA_IV]]#1 : !fir.ref do i=1, 9 diff --git a/flang/test/Lower/OpenMP/wsloop-ordered.f90 b/flang/test/Lower/OpenMP/wsloop-ordered.f90 index 5185d2d085bac..5483b9228e035 100644 --- a/flang/test/Lower/OpenMP/wsloop-ordered.f90 +++ b/flang/test/Lower/OpenMP/wsloop-ordered.f90 @@ -6,7 +6,8 @@ subroutine wsloop_ordered_no_para() integer :: a(10), i -! CHECK: omp.wsloop ordered(0) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { +! CHECK: omp.wsloop ordered(0) { +! CHECK: omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { ! CHECK: omp.yield ! CHECK: } @@ -25,7 +26,8 @@ subroutine wsloop_ordered_with_para() integer :: a(10), i ! CHECK: func @_QPwsloop_ordered_with_para() { -! CHECK: omp.wsloop ordered(1) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { +! CHECK: omp.wsloop ordered(1) { +! CHECK: omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { ! CHECK: omp.yield ! CHECK: } diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 index 4d30282fc8c21..bd9f68ecc7e4b 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 @@ -27,7 +27,8 @@ ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 index 7df4f37b98df8..a8f4e50f6a51c 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 @@ -68,7 +68,8 @@ ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref @@ -108,7 +109,8 @@ subroutine simple_int_reduction ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref @@ -149,7 +151,8 @@ subroutine simple_real_reduction ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref @@ -188,7 +191,8 @@ subroutine simple_int_reduction_switch_order ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref @@ -236,7 +240,8 @@ subroutine simple_real_reduction_switch_order ! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { ! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref ! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -297,7 +302,8 @@ subroutine multiple_int_reductions_same_type ! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { ! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref ! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -365,7 +371,8 @@ subroutine multiple_real_reductions_same_type ! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_17:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_18:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref, @add_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref, @add_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref) for (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref, @add_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref, @add_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) { ! CHECK: fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref ! CHECK: %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 index 9588531f6c909..6af928f6c9fb9 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 @@ -29,7 +29,8 @@ ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@iand_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: omp.wsloop reduction(@iand_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { ! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref ! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 index a14a37101874c..8a7d9366cf387 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 @@ -19,7 +19,8 @@ !CHECK: omp.parallel !CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_ieorEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref !CHECK: %[[PRV_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 index 3b5e327439358..8f28916724267 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 @@ -29,7 +29,8 @@ ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@ior_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) +! CHECK: omp.wsloop reduction(@ior_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) ! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref ! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 index 17d321620cca8..4303f15990ecc 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 @@ -36,7 +36,8 @@ ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> @@ -84,7 +85,8 @@ end subroutine simple_reduction ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref @@ -141,7 +143,8 @@ subroutine simple_reduction_switch_order(y) ! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { +! CHECK: omp.wsloop reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { ! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref ! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 index 8204e4c878cb0..994477f4edcec 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 @@ -36,7 +36,8 @@ ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> @@ -83,7 +84,8 @@ subroutine simple_reduction(y) ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref @@ -140,7 +142,8 @@ subroutine simple_reduction_switch_order(y) ! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { ! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref ! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 index 623368a50e864..29849b68be572 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 @@ -36,7 +36,8 @@ ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> @@ -84,7 +85,8 @@ subroutine simple_reduction(y) ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref @@ -143,7 +145,8 @@ subroutine simple_reduction_switch_order(y) ! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { ! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref ! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 index f1ae1bc687cd5..719677ca729f2 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 @@ -36,7 +36,8 @@ ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> @@ -83,7 +84,8 @@ subroutine simple_reduction(y) ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref @@ -140,7 +142,8 @@ subroutine simple_reduction_switch_order(y) ! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { +! CHECK: omp.wsloop reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { ! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref ! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90 index ed25cedae90c6..45ae04f3712ac 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90 @@ -29,7 +29,8 @@ ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { ! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref ! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 index ea3b1bebce038..09856f66b1e2c 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 @@ -40,7 +40,8 @@ ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { ! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref ! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref @@ -69,7 +70,8 @@ ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { ! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref ! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref @@ -88,7 +90,8 @@ ! CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_33:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_34:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref) for (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) { +! CHECK: omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) { ! CHECK: fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref ! CHECK: %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 index 3aa9001869dc5..eaac48b9e1b5a 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 @@ -40,7 +40,8 @@ ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@min_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: omp.wsloop reduction(@min_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { ! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref ! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref @@ -69,7 +70,8 @@ ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { ! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref ! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref @@ -90,7 +92,8 @@ ! CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_33:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_34:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref) for (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) { +! CHECK: omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) { ! CHECK: fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref ! CHECK: %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 index 4774fba3f33e9..4656960a45ba2 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 @@ -61,7 +61,8 @@ ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref @@ -97,7 +98,8 @@ subroutine simple_int_reduction ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref @@ -134,7 +136,8 @@ subroutine simple_real_reduction ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref @@ -170,7 +173,8 @@ subroutine simple_int_reduction_switch_order ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref @@ -215,7 +219,8 @@ subroutine simple_real_reduction_switch_order ! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @multiply_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { +! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @multiply_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { ! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref ! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -273,7 +278,8 @@ subroutine multiple_int_reductions_same_type ! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @multiply_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @multiply_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { +! CHECK: omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @multiply_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @multiply_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { ! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref ! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -338,7 +344,8 @@ subroutine multiple_real_reductions_same_type ! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_17:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_18:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref, @multiply_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref, @multiply_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref, @multiply_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref) for (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) { +! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref, @multiply_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref, @multiply_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref, @multiply_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) { ! CHECK: fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref ! CHECK: %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Lower/OpenMP/wsloop-simd.f90 b/flang/test/Lower/OpenMP/wsloop-simd.f90 index c3d5e3e0cda59..490f1665a7b1b 100644 --- a/flang/test/Lower/OpenMP/wsloop-simd.f90 +++ b/flang/test/Lower/OpenMP/wsloop-simd.f90 @@ -14,7 +14,8 @@ program wsloop_dynamic !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop schedule(runtime, simd) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) +!CHECK: omp.wsloop schedule(runtime, simd) nowait { +!CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !CHECK: fir.store %[[I]] to %[[STORE:.*]]#1 : !fir.ref do i=1, 9 diff --git a/flang/test/Lower/OpenMP/wsloop-unstructured.f90 b/flang/test/Lower/OpenMP/wsloop-unstructured.f90 index 7fe63a1fe607c..f5c1a8126edc1 100644 --- a/flang/test/Lower/OpenMP/wsloop-unstructured.f90 +++ b/flang/test/Lower/OpenMP/wsloop-unstructured.f90 @@ -29,7 +29,8 @@ end subroutine sub ! CHECK-SAME: %[[VAL_2:.*]]: !fir.ref> {fir.bindc_name = "x"}, ! CHECK-SAME: %[[VAL_3:.*]]: !fir.ref> {fir.bindc_name = "y"}) { ! [...] -! CHECK: omp.wsloop for (%[[VAL_53:.*]], %[[VAL_54:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[VAL_53:.*]], %[[VAL_54:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) { ! [...] ! CHECK: cf.br ^bb1 ! CHECK: ^bb1: diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90 index b3758f1fdc00f..e2a71e3d1b888 100644 --- a/flang/test/Lower/OpenMP/wsloop-variable.f90 +++ b/flang/test/Lower/OpenMP/wsloop-variable.f90 @@ -22,7 +22,8 @@ program wsloop_variable !CHECK: %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64 !CHECK: %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64 !CHECK: %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64 -!CHECK: omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16 !CHECK: fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]]#1 : !fir.ref !CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref @@ -46,7 +47,8 @@ program wsloop_variable !CHECK: %[[TMP12:.*]] = arith.constant 1 : i32 !CHECK: %[[TMP13:.*]] = fir.convert %{{.*}} : (i8) -> i32 !CHECK: %[[TMP14:.*]] = fir.convert %{{.*}} : (i64) -> i32 -!CHECK: omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]]) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16 !CHECK: fir.store %[[ARG0_I16]] to %[[STORE3:.*]]#1 : !fir.ref !CHECK: %[[LOAD3:.*]] = fir.load %[[STORE3]]#0 : !fir.ref @@ -64,7 +66,8 @@ program wsloop_variable !CHECK: %[[TMP17:.*]] = fir.convert %{{.*}} : (i8) -> i64 !CHECK: %[[TMP18:.*]] = fir.convert %{{.*}} : (i16) -> i64 !CHECK: %[[TMP19:.*]] = fir.convert %{{.*}} : (i32) -> i64 -!CHECK: omp.wsloop for (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]]) { !CHECK: %[[ARG1_I128:.*]] = fir.convert %[[ARG1]] : (i64) -> i128 !CHECK: fir.store %[[ARG1_I128]] to %[[STORE4:.*]]#1 : !fir.ref !CHECK: %[[LOAD4:.*]] = fir.load %[[STORE4]]#0 : !fir.ref @@ -118,7 +121,8 @@ subroutine wsloop_variable_sub !CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref !CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_23]] : (i8) -> i32 !CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_24]] : (i16) -> i32 -!CHECK: omp.wsloop for (%[[VAL_27:.*]]) : i32 = (%[[VAL_22]]) to (%[[VAL_25]]) inclusive step (%[[VAL_26]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[VAL_27:.*]]) : i32 = (%[[VAL_22]]) to (%[[VAL_25]]) inclusive step (%[[VAL_26]]) { !CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> i16 !CHECK: fir.store %[[VAL_28]] to %[[VAL_3]]#1 : !fir.ref !CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref @@ -160,7 +164,8 @@ subroutine wsloop_variable_sub !CHECK: %[[VAL_50:.*]] = arith.constant 1 : i32 !CHECK: %[[VAL_51:.*]] = arith.constant 10 : i32 !CHECK: %[[VAL_52:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop for (%[[VAL_53:.*]]) : i32 = (%[[VAL_50]]) to (%[[VAL_51]]) inclusive step (%[[VAL_52]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[VAL_53:.*]]) : i32 = (%[[VAL_50]]) to (%[[VAL_51]]) inclusive step (%[[VAL_52]]) { !CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_53]] : (i32) -> i8 !CHECK: fir.store %[[VAL_54]] to %[[VAL_1]]#1 : !fir.ref !CHECK: %[[VAL_55:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop.f90 b/flang/test/Lower/OpenMP/wsloop.f90 index 4068f715c3e18..71cd0ddc2763b 100644 --- a/flang/test/Lower/OpenMP/wsloop.f90 +++ b/flang/test/Lower/OpenMP/wsloop.f90 @@ -12,7 +12,8 @@ subroutine simple_loop ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) { !$OMP DO do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_DECL:.*]]#1 : !fir.ref @@ -36,7 +37,8 @@ subroutine simple_loop_with_step ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 2 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) { ! CHECK: fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref !$OMP DO @@ -60,7 +62,8 @@ subroutine loop_with_schedule_nowait ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop schedule(runtime) nowait { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) { !$OMP DO SCHEDULE(runtime) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref diff --git a/flang/test/Transforms/DoConcurrent/basic.f90 b/flang/test/Transforms/DoConcurrent/basic.f90 index a555a25c9bad5..248223d72ff11 100644 --- a/flang/test/Transforms/DoConcurrent/basic.f90 +++ b/flang/test/Transforms/DoConcurrent/basic.f90 @@ -23,7 +23,8 @@ program do_concurrent_basic ! CHECK: %[[UB:.*]] = fir.convert %[[C10]] : (i32) -> index ! CHECK: %[[STEP:.*]] = arith.constant 1 : index - ! CHECK: omp.wsloop for (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { ! CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32 ! CHECK-NEXT: fir.store %[[IV_IDX]] to %[[BINDING]]#1 : !fir.ref ! CHECK-NEXT: %[[IV_VAL1:.*]] = fir.load %[[BINDING]]#0 : !fir.ref diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index cf3bc9683cc89..7e6a590d10bb0 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -1204,7 +1204,7 @@ def TaskGroupOp : OpenMP_Op<"taskgroup", [AttrSizedOperandSegments, // 2.10.4 taskyield Construct //===----------------------------------------------------------------------===// -def TaskyieldOp : OpenMP_Op<"taskyield"> { +def TaskYieldOp : OpenMP_Op<"taskyield"> { let summary = "taskyield construct"; let description = [{ The taskyield construct specifies that the current task can be suspended @@ -1723,6 +1723,10 @@ def TargetOp : OpenMP_Op<"target",[IsolatedFromAbove, MapClauseOwningOpInterface TODO: is_device_ptr, defaultmap, in_reduction }]; + // TODO Remove num_teams_lower, num_teams_upper, teams_thread_limit and + // num_threads args and instead calculate them outside and implicitly map + // them. If not used and implicitly mapped, they can be omitted from the + // outlined function arg list. let arguments = (ins Optional:$if_expr, Optional:$device, Optional:$thread_limit, @@ -1914,7 +1918,7 @@ def OrderedRegionOp : OpenMP_Op<"ordered_region"> { // 2.17.5 taskwait Construct //===----------------------------------------------------------------------===// -def TaskwaitOp : OpenMP_Op<"taskwait"> { +def TaskWaitOp : OpenMP_Op<"taskwait"> { let summary = "taskwait construct"; let description = [{ The taskwait construct specifies a wait on the completion of child tasks diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp index 83eb1653ca950..117db5c39db59 100644 --- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp +++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp @@ -319,8 +319,8 @@ void ConvertOpenMPToLLVMPass::runOnOperation() { populateOpenMPToLLVMConversionPatterns(converter, patterns); LLVMConversionTarget target(getContext()); - target.addLegalOp(); + target.addLegalOp(); configureOpenMPToLLVMConversionLegality(target, converter); if (failed(applyPartialConversion(module, target, std::move(patterns)))) signalPassFailure(); diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index 464a647564ace..7bebd874f47b4 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -461,7 +461,11 @@ struct ParallelOpLowering : public OpRewritePattern { // Replace the loop. { OpBuilder::InsertionGuard allocaGuard(rewriter); - auto loop = rewriter.create( + // TODO Test that this didn't break something. + auto wsloop = rewriter.create(parallelOp.getLoc()); + rewriter.createBlock(&wsloop.getRegion()); + + auto loop = rewriter.create( parallelOp.getLoc(), parallelOp.getLowerBound(), parallelOp.getUpperBound(), parallelOp.getStep()); rewriter.create(loc); @@ -482,9 +486,9 @@ struct ParallelOpLowering : public OpRewritePattern { rewriter.setInsertionPointToEnd(&*scope.getBodyRegion().begin()); rewriter.create(loc, ValueRange()); if (!reductionVariables.empty()) { - loop.setReductionsAttr( + wsloop.setReductionsAttr( ArrayAttr::get(rewriter.getContext(), reductionDeclSymbols)); - loop.getReductionVarsMutable().append(reductionVariables); + wsloop.getReductionVarsMutable().append(reductionVariables); } } } diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index c7130e8e2c9f3..bdaecb5e6de9e 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1933,6 +1933,9 @@ LogicalResult WsLoopOp::verify() { // TODO If composite, must have composite parallel parent or simd and no // wrapper parent. Otherwise, no composite parent. + // TODO If composite, must have composite parallel parent or simd and no + // wrapper parent. Otherwise, no composite parent. + return verifyReductionVarList(*this, getReductions(), getReductionVars()); } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 3605e03ae886e..7e5e3c47e0d03 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -893,31 +893,32 @@ static LogicalResult convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); - auto loop = cast(opInst); + auto wsloop = cast(opInst); + auto loop = cast(wsloop.getWrappedLoop()); // TODO: this should be in the op verifier instead. if (loop.getLowerBound().empty()) return failure(); // Static is the default. auto schedule = - loop.getScheduleVal().value_or(omp::ClauseScheduleKind::Static); + wsloop.getScheduleVal().value_or(omp::ClauseScheduleKind::Static); // Find the loop configuration. llvm::Value *step = moduleTranslation.lookupValue(loop.getStep()[0]); llvm::Type *ivType = step->getType(); llvm::Value *chunk = nullptr; - if (loop.getScheduleChunkVar()) { + if (wsloop.getScheduleChunkVar()) { llvm::Value *chunkVar = - moduleTranslation.lookupValue(loop.getScheduleChunkVar()); + moduleTranslation.lookupValue(wsloop.getScheduleChunkVar()); chunk = builder.CreateSExtOrTrunc(chunkVar, ivType); } SmallVector reductionDecls; - collectReductionDecls(loop, reductionDecls); + collectReductionDecls(wsloop, reductionDecls); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); DenseMap reductionVariableMap; - allocReductionVars(loop, builder, *ompBuilder, moduleTranslation, allocaIP, + allocReductionVars(wsloop, builder, *ompBuilder, moduleTranslation, allocaIP, reductionDecls, reductionVariableMap); // Store the mapping between reduction variables and their private copies on @@ -929,7 +930,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, // Before the loop, store the initial values of reductions into reduction // variables. Although this could be done after allocas, we don't want to mess // up with the alloca insertion point. - for (unsigned i = 0; i < loop.getNumReductionVars(); ++i) { + for (unsigned i = 0; i < wsloop.getNumReductionVars(); ++i) { SmallVector phis; if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral", builder, @@ -1020,10 +1021,10 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, allocaIP = findAllocaInsertPoint(builder, moduleTranslation); // TODO: Handle doacross loops when the ordered clause has a parameter. - bool isOrdered = loop.getOrderedVal().has_value(); + bool isOrdered = wsloop.getOrderedVal().has_value(); std::optional scheduleModifier = - loop.getScheduleModifier(); - bool isSimd = loop.getSimdModifier(); + wsloop.getScheduleModifier(); + bool isSimd = wsloop.getSimdModifier(); bool distributeCodeGen = opInst.getParentOfType(); bool parallelCodeGen = opInst.getParentOfType(); @@ -1036,7 +1037,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, workshareLoopType = llvm::omp::WorksharingLoopType::ForStaticLoop; } ompBuilder->applyWorkshareLoop( - ompLoc.DL, loopInfo, allocaIP, !loop.getNowait(), + ompLoc.DL, loopInfo, allocaIP, !wsloop.getNowait(), convertToScheduleKind(schedule), chunk, isSimd, scheduleModifier == omp::ScheduleModifier::monotonic, scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered, @@ -1049,12 +1050,12 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, builder.restoreIP(afterIP); // Process the reductions if required. - if (loop.getNumReductionVars() == 0) + if (wsloop.getNumReductionVars() == 0) return success(); // Create the reduction generators. We need to own them here because // ReductionInfo only accepts references to the generators. - collectReductionInfo(loop, builder, *ompBuilder, moduleTranslation, + collectReductionInfo(wsloop, builder, *ompBuilder, moduleTranslation, reductionDecls); // The call to createReductions below expects the block to have a // terminator. Create an unreachable instruction to serve as terminator @@ -1063,12 +1064,12 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, builder.SetInsertPoint(tempTerminator); llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = - ompBuilder->createReductions(builder.saveIP(), allocaIP, - ompBuilder->RIManager.getReductionInfos(), - loop.getNowait(), /*IsTeamsReduction*/ false, - /*HasDistribute*/ distributeCodeGen); + ompBuilder->createReductions( + builder.saveIP(), allocaIP, ompBuilder->RIManager.getReductionInfos(), + wsloop.getNowait(), /*IsTeamsReduction*/ false, + /*HasDistribute*/ distributeCodeGen); if (!contInsertPoint.getBlock()) - return loop->emitOpError() << "failed to convert reductions"; + return wsloop->emitOpError() << "failed to convert reductions"; auto nextInsertionPoint = ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for); tempTerminator->eraseFromParent(); @@ -1331,7 +1332,8 @@ convertOmpParallel(Operation &opInst1, llvm::IRBuilderBase &builder, static LogicalResult convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { - auto loop = cast(opInst); + auto simd = cast(opInst); + auto loop = cast(simd.getWrappedLoop()); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); @@ -1410,17 +1412,17 @@ convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder, ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); llvm::ConstantInt *simdlen = nullptr; - if (std::optional simdlenVar = loop.getSimdlen()) + if (std::optional simdlenVar = simd.getSimdlen()) simdlen = builder.getInt64(simdlenVar.value()); llvm::ConstantInt *safelen = nullptr; - if (std::optional safelenVar = loop.getSafelen()) + if (std::optional safelenVar = simd.getSafelen()) safelen = builder.getInt64(safelenVar.value()); llvm::MapVector alignedVars; ompBuilder->applySimd( loopInfo, alignedVars, - loop.getIfExpr() ? moduleTranslation.lookupValue(loop.getIfExpr()) + simd.getIfExpr() ? moduleTranslation.lookupValue(simd.getIfExpr()) : nullptr, llvm::omp::OrderKind::OMP_ORDER_unknown, simdlen, safelen); @@ -3320,11 +3322,11 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); return success(); }) - .Case([&](omp::TaskwaitOp) { + .Case([&](omp::TaskWaitOp) { ompBuilder->createTaskwait(builder.saveIP()); return success(); }) - .Case([&](omp::TaskyieldOp) { + .Case([&](omp::TaskYieldOp) { ompBuilder->createTaskyield(builder.saveIP()); return success(); }) diff --git a/mlir/test/CAPI/execution_engine.c b/mlir/test/CAPI/execution_engine.c index 38a8fb8c3e213..a3bf38bdfb131 100644 --- a/mlir/test/CAPI/execution_engine.c +++ b/mlir/test/CAPI/execution_engine.c @@ -99,10 +99,13 @@ void testOmpCreation(void) { " %1 = arith.constant 1 : i32 \n" " %2 = arith.constant 2 : i32 \n" " omp.parallel { \n" -" omp.wsloop for (%3) : i32 = (%0) to (%2) step (%1) { \n" +" omp.wsloop { \n" +" omp.loopnest (%3) : i32 = (%0) to (%2) step (%1) { \n" " omp.yield \n" " } \n" " omp.terminator \n" +" } \n" +" omp.terminator \n" " } \n" " llvm.return \n" " } \n" diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir index 6cbc0c8f4be9a..de5dfc1a288d0 100644 --- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir @@ -71,7 +71,8 @@ func.func @branch_loop() { func.func @wsloop(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) { // CHECK: omp.parallel omp.parallel { - // CHECK: omp.wsloop for (%[[ARG6:.*]], %[[ARG7:.*]]) : i64 = (%[[ARG0]], %[[ARG1]]) to (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[ARG5]]) { + // CHECK: omp.wsloop { + // CHECK: omp.loopnest (%[[ARG6:.*]], %[[ARG7:.*]]) : i64 = (%[[ARG0]], %[[ARG1]]) to (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[ARG5]]) { "omp.wsloop"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) ({ ^bb0(%arg6: index, %arg7: index): // CHECK-DAG: %[[CAST_ARG6:.*]] = builtin.unrealized_conversion_cast %[[ARG6]] : i64 to index @@ -320,7 +321,8 @@ llvm.func @_QPsb() { // CHECK-LABEL: @_QPsimple_reduction // CHECK: %[[RED_ACCUMULATOR:.*]] = llvm.alloca %{{.*}} x i32 {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} : (i64) -> !llvm.ptr // CHECK: omp.parallel -// CHECK: omp.wsloop reduction(@eqv_reduction %{{.+}} -> %[[PRV:.+]] : !llvm.ptr) for +// CHECK: omp.wsloop reduction(@eqv_reduction %{{.+}} -> %[[PRV:.+]] : !llvm.ptr) { +// CHECK: omp.loopnest // CHECK: %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> i32 // CHECK: %[[CMP:.+]] = llvm.icmp "eq" %{{.*}}, %[[LPRV]] : i32 // CHECK: %[[ZEXT:.+]] = llvm.zext %[[CMP]] : i1 to i32 @@ -353,7 +355,8 @@ llvm.func @_QPsimple_reduction(%arg0: !llvm.ptr {fir.bindc_name = "y"}) { llvm.store %5, %4 : i32, !llvm.ptr omp.parallel { %6 = llvm.alloca %3 x i32 {adapt.valuebyref, in_type = i32, operandSegmentSizes = array, pinned} : (i64) -> !llvm.ptr - omp.wsloop reduction(@eqv_reduction %4 -> %prv : !llvm.ptr) for (%arg1) : i32 = (%1) to (%0) inclusive step (%1) { + omp.wsloop reduction(@eqv_reduction %4 -> %prv : !llvm.ptr) { + omp.loopnest (%arg1) : i32 = (%1) to (%0) inclusive step (%1) { llvm.store %arg1, %6 : i32, !llvm.ptr %7 = llvm.load %6 : !llvm.ptr -> i32 %8 = llvm.sext %7 : i32 to i64 @@ -367,6 +370,8 @@ llvm.func @_QPsimple_reduction(%arg0: !llvm.ptr {fir.bindc_name = "y"}) { omp.yield } omp.terminator + } + omp.terminator } llvm.return } diff --git a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir index acd2690c56e2e..d1e515b8ae813 100644 --- a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir +++ b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir @@ -5,7 +5,8 @@ func.func @parallel(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) { // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32 // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) { - // CHECK: omp.wsloop for (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { + // CHECK: omp.wsloop { + // CHECK: omp.loopnest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { // CHECK: memref.alloca_scope scf.parallel (%i, %j) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { // CHECK: "test.payload"(%[[LVAR1]], %[[LVAR2]]) : (index, index) -> () @@ -23,8 +24,9 @@ func.func @nested_loops(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) { // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32 // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) { - // CHECK: omp.wsloop for (%[[LVAR_OUT1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) { - // CHECK: memref.alloca_scope + // CHECK: omp.wsloop { + // CHECK: omp.loopnest (%[[LVAR_OUT1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) { + // CHECK: memref.alloca_scope scf.parallel (%i) = (%arg0) to (%arg2) step (%arg4) { // CHECK: omp.parallel // CHECK: omp.wsloop for (%[[LVAR_IN1:.*]]) : index = (%arg1) to (%arg3) step (%arg5) { @@ -47,7 +49,8 @@ func.func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) { // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32 // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) { - // CHECK: omp.wsloop for (%[[LVAR_AL1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) { + // CHECK: omp.wsloop { + // CHECK: omp.loopnest (%[[LVAR_AL1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) { // CHECK: memref.alloca_scope scf.parallel (%i) = (%arg0) to (%arg2) step (%arg4) { // CHECK: "test.payload1"(%[[LVAR_AL1]]) : (index) -> () @@ -60,7 +63,8 @@ func.func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index, // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32 // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) { - // CHECK: omp.wsloop for (%[[LVAR_AL2:.*]]) : index = (%arg1) to (%arg3) step (%arg5) { + // CHECK: omp.wsloop { + // CHECK: omp.loopnest (%[[LVAR_AL2:.*]]) : index = (%arg1) to (%arg3) step (%arg5) { // CHECK: memref.alloca_scope scf.parallel (%j) = (%arg1) to (%arg3) step (%arg5) { // CHECK: "test.payload2"(%[[LVAR_AL2]]) : (index) -> () diff --git a/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir b/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir index 37720e98d92a9..f48f8152ee022 100644 --- a/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir +++ b/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir @@ -32,7 +32,8 @@ llvm.func @repeated_successor_no_args(%arg0: i1) { // CHECK: @repeated_successor_openmp llvm.func @repeated_successor_openmp(%arg0: i64, %arg1: i64, %arg2: i64, %arg3: i1) { - omp.wsloop for (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) { + omp.wsloop { + omp.loopnest (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) { // CHECK: llvm.cond_br %{{.*}}, ^[[BB1:.*]]({{.*}}), ^[[BB2:.*]]({{.*}}) llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64) // CHECK: ^[[BB1]] @@ -41,6 +42,8 @@ llvm.func @repeated_successor_openmp(%arg0: i64, %arg1: i64, %arg2: i64, %arg3: // CHECK: ^[[BB2]](%[[ARG:.*]]: i64): // CHECK: llvm.br ^[[BB1]](%[[ARG]] : i64) } + omp.terminator + } llvm.return } diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 448f37b32fff6..8a8ec498ed76b 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -98,7 +98,7 @@ func.func @invalid_parent(%lb : index, %ub : index, %step : index) { func.func @type_mismatch(%lb : index, %ub : index, %step : index) { // TODO Remove induction variables from omp.wsloop. - omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) { + omp.wsloop { // expected-error@+1 {{range argument type does not match corresponding IV type}} "omp.loopnest" (%lb, %ub, %step) ({ ^bb0(%iv2: i32): @@ -112,7 +112,7 @@ func.func @type_mismatch(%lb : index, %ub : index, %step : index) { func.func @iv_number_mismatch(%lb : index, %ub : index, %step : index) { // TODO Remove induction variables from omp.wsloop. - omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) { + omp.wsloop { // expected-error@+1 {{number of range arguments and IVs do not match}} "omp.loopnest" (%lb, %ub, %step) ({ ^bb0(%iv1 : index, %iv2 : index): @@ -125,7 +125,7 @@ func.func @iv_number_mismatch(%lb : index, %ub : index, %step : index) { // ----- func.func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) { - // expected-error @below {{expected 'for'}} + // expected-error @below {{expected '{' to begin a region}} omp.wsloop nowait inclusive for (%iv) : index = (%lb) to (%ub) step (%step) { omp.yield @@ -145,7 +145,7 @@ func.func @order_value(%lb : index, %ub : index, %step : index) { // ----- func.func @if_not_allowed(%lb : index, %ub : index, %step : index, %bool_var : i1) { - // expected-error @below {{expected 'for'}} + // expected-error @below {{expected '{'}} omp.wsloop if(%bool_var: i1) for (%iv) : index = (%lb) to (%ub) step (%step) { omp.yield @@ -155,7 +155,7 @@ func.func @if_not_allowed(%lb : index, %ub : index, %step : index, %bool_var : i // ----- func.func @num_threads_not_allowed(%lb : index, %ub : index, %step : index, %int_var : i32) { - // expected-error @below {{expected 'for'}} + // expected-error @below {{expected '{'}} omp.wsloop num_threads(%int_var: i32) for (%iv) : index = (%lb) to (%ub) step (%step) { omp.yield @@ -479,12 +479,14 @@ func.func @foo(%lb : index, %ub : index, %step : index) { %1 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr // expected-error @below {{expected symbol reference @foo to point to a reduction declaration}} - omp.wsloop reduction(@foo %0 -> %prv : !llvm.ptr) - for (%iv) : index = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@foo %0 -> %prv : !llvm.ptr) { + omp.loopnest (%iv) : index = (%lb) to (%ub) step (%step) { %2 = arith.constant 2.0 : f32 omp.reduction %2, %1 : f32, !llvm.ptr omp.yield } + omp.terminator + } return } @@ -507,12 +509,14 @@ func.func @foo(%lb : index, %ub : index, %step : index) { %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr // expected-error @below {{accumulator variable used more than once}} - omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr, @add_f32 %0 -> %prv1 : !llvm.ptr) - for (%iv) : index = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr, @add_f32 %0 -> %prv1 : !llvm.ptr) { + omp.loopnest (%iv) : index = (%lb) to (%ub) step (%step) { %2 = arith.constant 2.0 : f32 omp.reduction %2, %0 : f32, !llvm.ptr omp.yield } + omp.terminator + } return } @@ -540,12 +544,14 @@ func.func @foo(%lb : index, %ub : index, %step : index, %mem : memref<1xf32>) { %c1 = arith.constant 1 : i32 // expected-error @below {{expected accumulator ('memref<1xf32>') to be the same type as reduction declaration ('!llvm.ptr')}} - omp.wsloop reduction(@add_f32 %mem -> %prv : memref<1xf32>) - for (%iv) : index = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@add_f32 %mem -> %prv : memref<1xf32>) { + omp.loopnest (%iv) : index = (%lb) to (%ub) step (%step) { %2 = arith.constant 2.0 : f32 omp.reduction %2, %mem : f32, memref<1xf32> omp.yield } + omp.terminator + } return } @@ -577,27 +583,32 @@ omp.critical.declare @mutex hint(invalid_hint) // ----- func.func @omp_ordered1(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () { - omp.wsloop ordered(1) - for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { + omp.wsloop ordered(1) { + omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}} omp.ordered_region { omp.terminator } omp.yield } + omp.terminator + } return } // ----- func.func @omp_ordered2(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () { - omp.wsloop for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { + omp.wsloop { + omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}} omp.ordered_region { omp.terminator } omp.yield } + omp.terminator + } return } @@ -612,25 +623,29 @@ func.func @omp_ordered3(%vec0 : i64) -> () { // ----- func.func @omp_ordered4(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64) -> () { - omp.wsloop ordered(0) - for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { + omp.wsloop ordered(0) { + omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { // expected-error @below {{ordered depend directive must be closely nested inside a worksharing-loop with ordered clause with parameter present}} omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64} omp.yield } + omp.terminator + } return } // ----- func.func @omp_ordered5(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64, %vec1 : i64) -> () { - omp.wsloop ordered(1) - for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { + omp.wsloop ordered(1) { + omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { // expected-error @below {{number of variables in depend clause does not match number of iteration variables in the doacross loop}} omp.ordered depend_type(dependsource) depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64} omp.yield } + omp.terminator + } return } @@ -1462,12 +1477,14 @@ func.func @omp_cancel2() { // ----- func.func @omp_cancel3(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () { - omp.wsloop nowait - for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { + omp.wsloop nowait { + omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { // expected-error @below {{A worksharing construct that is canceled must not have a nowait clause}} omp.cancel cancellation_construct_type(loop) // CHECK: omp.terminator omp.terminator + } + omp.terminator } return } @@ -1475,12 +1492,14 @@ func.func @omp_cancel3(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () { // ----- func.func @omp_cancel4(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () { - omp.wsloop ordered(1) - for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { + omp.wsloop ordered(1) { + omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { // expected-error @below {{A worksharing construct that is canceled must not have an ordered clause}} omp.cancel cancellation_construct_type(loop) // CHECK: omp.terminator omp.terminator + } + omp.terminator } return } diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir index afbf5f2224630..d1758035b8956 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir @@ -12,10 +12,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : i32) : i32 - omp.wsloop for (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) { + omp.wsloop { + omp.loopnest (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) { %gep = llvm.getelementptr %arg0[0, %loop_cnt] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32> llvm.store %loop_cnt, %gep : i32, !llvm.ptr omp.yield + } + omp.terminator } omp.terminator } diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir index 435aca32450c2..36fa2261e385c 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir @@ -8,7 +8,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %loop_ub = llvm.mlir.constant(99 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : index) : i32 - omp.wsloop for (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) { + omp.wsloop { + omp.loopnest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) { %1 = llvm.add %arg1, %arg2 : i32 %2 = llvm.mul %arg2, %loop_ub overflow : i32 %3 = llvm.add %arg1, %2 :i32 @@ -16,6 +17,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo llvm.store %1, %4 : i32, !llvm.ptr omp.yield } + omp.terminator + } llvm.return } } diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir index 4cfb7d4f69514..0f48d45c53a09 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir @@ -8,10 +8,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : i32) : i32 - omp.wsloop for (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) { + omp.wsloop { + omp.loopnest (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) { %gep = llvm.getelementptr %arg0[0, %loop_cnt] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32> llvm.store %loop_cnt, %gep : i32, !llvm.ptr omp.yield + } + omp.terminator } llvm.return } @@ -20,8 +23,11 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : i32) : i32 - omp.wsloop for (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) { + omp.wsloop { + omp.loopnest (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) { omp.yield + } + omp.terminator } llvm.return } diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 3571526c078a5..a7393c47e1c5d 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -320,7 +320,8 @@ llvm.func @wsloop_simple(%arg0: !llvm.ptr) { %1 = llvm.mlir.constant(10 : index) : i64 %2 = llvm.mlir.constant(1 : index) : i64 omp.parallel { - "omp.wsloop"(%1, %0, %2) ({ + omp.wsloop { + "omp.loopnest" (%1, %0, %2) ({ ^bb0(%arg1: i64): // The form of the emitted IR is controlled by OpenMPIRBuilder and // tested there. Just check that the right functions are called. @@ -334,6 +335,8 @@ llvm.func @wsloop_simple(%arg0: !llvm.ptr) { }) {operandSegmentSizes = array} : (i64, i64, i64) -> () omp.terminator } + omp.terminator + } llvm.return } @@ -345,13 +348,16 @@ llvm.func @wsloop_inclusive_1(%arg0: !llvm.ptr) { %1 = llvm.mlir.constant(10 : index) : i64 %2 = llvm.mlir.constant(1 : index) : i64 // CHECK: store i64 31, ptr %{{.*}}upperbound - "omp.wsloop"(%1, %0, %2) ({ + omp.wsloop { + "omp.loopnest"(%1, %0, %2) ({ ^bb0(%arg1: i64): %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 llvm.store %3, %4 : f32, !llvm.ptr omp.yield }) {operandSegmentSizes = array} : (i64, i64, i64) -> () + omp.terminator + } llvm.return } @@ -363,13 +369,16 @@ llvm.func @wsloop_inclusive_2(%arg0: !llvm.ptr) { %1 = llvm.mlir.constant(10 : index) : i64 %2 = llvm.mlir.constant(1 : index) : i64 // CHECK: store i64 32, ptr %{{.*}}upperbound - "omp.wsloop"(%1, %0, %2) ({ + omp.wsloop { + "omp.loopnest"(%1, %0, %2) ({ ^bb0(%arg1: i64): %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 llvm.store %3, %4 : f32, !llvm.ptr omp.yield }) {inclusive, operandSegmentSizes = array} : (i64, i64, i64) -> () + omp.terminator + } llvm.return } @@ -379,13 +388,15 @@ llvm.func @body(i32) // CHECK-LABEL: @test_omp_wsloop_static_defchunk llvm.func @test_omp_wsloop_static_defchunk(%lb : i32, %ub : i32, %step : i32) -> () { - omp.wsloop schedule(static) - for (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(static) { + omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 34, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 0) // CHECK: call void @__kmpc_for_static_fini llvm.call @body(%iv) : (i32) -> () omp.yield } + omp.terminator + } llvm.return } @@ -396,13 +407,15 @@ llvm.func @body(i32) // CHECK-LABEL: @test_omp_wsloop_static_1 llvm.func @test_omp_wsloop_static_1(%lb : i32, %ub : i32, %step : i32) -> () { %static_chunk_size = llvm.mlir.constant(1 : i32) : i32 - omp.wsloop schedule(static = %static_chunk_size : i32) - for (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(static = %static_chunk_size : i32) { + omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 33, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 1) // CHECK: call void @__kmpc_for_static_fini llvm.call @body(%iv) : (i32) -> () omp.yield } + omp.terminator + } llvm.return } @@ -413,13 +426,15 @@ llvm.func @body(i32) // CHECK-LABEL: @test_omp_wsloop_static_2 llvm.func @test_omp_wsloop_static_2(%lb : i32, %ub : i32, %step : i32) -> () { %static_chunk_size = llvm.mlir.constant(2 : i32) : i32 - omp.wsloop schedule(static = %static_chunk_size : i32) - for (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(static = %static_chunk_size : i32) { + omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 33, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 2) // CHECK: call void @__kmpc_for_static_fini llvm.call @body(%iv) : (i32) -> () omp.yield } + omp.terminator + } llvm.return } @@ -428,8 +443,8 @@ llvm.func @test_omp_wsloop_static_2(%lb : i32, %ub : i32, %step : i32) -> () { llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(dynamic) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -437,6 +452,8 @@ llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () { llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -446,8 +463,8 @@ llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic_chunk_const(%lb : i64, %ub : i64, %step : i64) -> () { %chunk_size_const = llvm.mlir.constant(2 : i16) : i16 - omp.wsloop schedule(dynamic = %chunk_size_const : i16) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic = %chunk_size_const : i16) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i64 {{.*}}, i64 %{{.*}}, i64 {{.*}}, i64 2) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -455,6 +472,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_const(%lb : i64, %ub : i64, %step : i64 llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -466,8 +485,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var(%lb : i32, %ub : i32, %step : i32) %1 = llvm.mlir.constant(1 : i64) : i64 %chunk_size_alloca = llvm.alloca %1 x i16 {bindc_name = "chunk_size", in_type = i16, uniq_name = "_QFsub1Echunk_size"} : (i64) -> !llvm.ptr %chunk_size_var = llvm.load %chunk_size_alloca : !llvm.ptr -> i16 - omp.wsloop schedule(dynamic = %chunk_size_var : i16) - for (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic = %chunk_size_var : i16) { + omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: %[[CHUNK_SIZE:.*]] = sext i16 %{{.*}} to i32 // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]]) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u @@ -475,6 +494,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var(%lb : i32, %ub : i32, %step : i32) // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i32) -> () omp.yield + } + omp.terminator } llvm.return } @@ -487,8 +508,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32) %1 = llvm.mlir.constant(1 : i64) : i64 %chunk_size_alloca = llvm.alloca %1 x i64 {bindc_name = "chunk_size", in_type = i64, uniq_name = "_QFsub1Echunk_size"} : (i64) -> !llvm.ptr %chunk_size_var = llvm.load %chunk_size_alloca : !llvm.ptr -> i64 - omp.wsloop schedule(dynamic = %chunk_size_var : i64) - for (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic = %chunk_size_var : i64) { + omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: %[[CHUNK_SIZE:.*]] = trunc i64 %{{.*}} to i32 // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]]) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u @@ -497,6 +518,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32) llvm.call @body(%iv) : (i32) -> () omp.yield } + omp.terminator + } llvm.return } @@ -505,8 +528,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32) llvm.func @body(i32) llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32, %chunk_size : i32) -> () { - omp.wsloop schedule(dynamic = %chunk_size : i32) - for (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic = %chunk_size : i32) { + omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %{{.*}}) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -514,6 +537,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32, llvm.call @body(%iv) : (i32) -> () omp.yield } + omp.terminator + } llvm.return } @@ -522,8 +547,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32, llvm.func @body(i64) llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(auto) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(auto) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -531,6 +556,8 @@ llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () { llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -539,8 +566,8 @@ llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () { llvm.func @body(i64) llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(runtime) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(runtime) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -548,6 +575,8 @@ llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () { llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -556,8 +585,8 @@ llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () { llvm.func @body(i64) llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(guided) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(guided) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -565,6 +594,8 @@ llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () { llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -573,8 +604,8 @@ llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () { llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(dynamic, nonmonotonic) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic, nonmonotonic) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859 // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -582,6 +613,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i6 llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -590,8 +623,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i6 llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(dynamic, monotonic) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic, monotonic) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 536870947 // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -599,6 +632,8 @@ llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -607,8 +642,8 @@ llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) llvm.func @body(i64) llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(runtime, simd) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(runtime, simd) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741871 // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -616,6 +651,8 @@ llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -624,8 +661,8 @@ llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () llvm.func @body(i64) llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(guided, simd) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(guided, simd) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741870 // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -633,6 +670,8 @@ llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> () llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -782,8 +821,8 @@ llvm.func @simdloop_if(%arg0: !llvm.ptr {fir.bindc_name = "n"}, %arg1: !llvm.ptr llvm.func @body(i64) llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -792,6 +831,8 @@ llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () { llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -800,8 +841,8 @@ llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () { llvm.func @body(i64) llvm.func @test_omp_wsloop_static_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(static) ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(static) ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -810,6 +851,8 @@ llvm.func @test_omp_wsloop_static_ordered(%lb : i64, %ub : i64, %step : i64) -> llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -819,8 +862,8 @@ llvm.func @body(i32) llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i32) -> () { %static_chunk_size = llvm.mlir.constant(1 : i32) : i32 - omp.wsloop schedule(static = %static_chunk_size : i32) ordered(0) - for (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(static = %static_chunk_size : i32) ordered(0) { + omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 65, i32 1, i32 %{{.*}}, i32 1, i32 1) // CHECK: call void @__kmpc_dispatch_fini_4u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u @@ -829,6 +872,8 @@ llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i3 llvm.call @body(%iv) : (i32) -> () omp.yield } + omp.terminator + } llvm.return } @@ -837,8 +882,8 @@ llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i3 llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(dynamic) ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic) ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 67, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -847,6 +892,8 @@ llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) -> llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -855,8 +902,8 @@ llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) -> llvm.func @body(i64) llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(auto) ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(auto) ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 70, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -865,6 +912,8 @@ llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> () llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -873,8 +922,8 @@ llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> () llvm.func @body(i64) llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(runtime) ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(runtime) ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 69, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -883,6 +932,8 @@ llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) -> llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -891,8 +942,8 @@ llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) -> llvm.func @body(i64) llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(guided) ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(guided) ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 68, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -901,6 +952,8 @@ llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) -> llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -909,8 +962,8 @@ llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) -> llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(dynamic, nonmonotonic) ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic, nonmonotonic) ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741891, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -919,6 +972,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %s llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -927,8 +982,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %s llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic_monotonic_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(dynamic, monotonic) ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic, monotonic) ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 536870979, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -937,6 +992,8 @@ llvm.func @test_omp_wsloop_dynamic_monotonic_ordered(%lb : i64, %ub : i64, %step llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -1103,8 +1160,8 @@ llvm.func @collapse_wsloop( // CHECK: %[[TOTAL_SUB_1:.*]] = sub i32 %[[TOTAL]], 1 // CHECK: store i32 %[[TOTAL_SUB_1]], ptr // CHECK: call void @__kmpc_for_static_init_4u - omp.wsloop - for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { + omp.wsloop { + omp.loopnest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { %31 = llvm.load %20 : !llvm.ptr -> i32 %32 = llvm.add %31, %arg0 : i32 %33 = llvm.add %32, %arg1 : i32 @@ -1113,6 +1170,8 @@ llvm.func @collapse_wsloop( omp.yield } omp.terminator + } + omp.terminator } llvm.return } @@ -1164,8 +1223,8 @@ llvm.func @collapse_wsloop_dynamic( // CHECK: store i32 1, ptr // CHECK: store i32 %[[TOTAL]], ptr // CHECK: call void @__kmpc_dispatch_init_4u - omp.wsloop schedule(dynamic) - for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { + omp.wsloop schedule(dynamic) { + omp.loopnest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { %31 = llvm.load %20 : !llvm.ptr -> i32 %32 = llvm.add %31, %arg0 : i32 %33 = llvm.add %32, %arg1 : i32 @@ -1174,6 +1233,8 @@ llvm.func @collapse_wsloop_dynamic( omp.yield } omp.terminator + } + omp.terminator } llvm.return } @@ -1196,8 +1257,8 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64, // CHECK: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[OMP_THREAD]]) } - omp.wsloop ordered(0) - for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) { + omp.wsloop ordered(0) { + omp.loopnest (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) { // CHECK: call void @__kmpc_ordered(ptr @[[GLOB3:[0-9]+]], i32 [[OMP_THREAD2:%.*]]) omp.ordered_region { omp.terminator @@ -1205,9 +1266,11 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64, } omp.yield } + omp.terminator + } - omp.wsloop ordered(1) - for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) { + omp.wsloop ordered(1) { + omp.loopnest (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) { // CHECK: [[TMP:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR]], i64 0, i64 0 // CHECK: store i64 [[ARG0:%.*]], ptr [[TMP]], align 8 // CHECK: [[TMP2:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR]], i64 0, i64 0 @@ -1224,9 +1287,11 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64, omp.yield } + omp.terminator + } - omp.wsloop ordered(2) - for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) { + omp.wsloop ordered(2) { + omp.loopnest (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) { // CHECK: [[TMP5:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR5]], i64 0, i64 0 // CHECK: store i64 [[ARG0]], ptr [[TMP5]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR5]], i64 0, i64 1 @@ -1254,6 +1319,8 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64, omp.yield } + omp.terminator + } llvm.return } @@ -2122,11 +2189,14 @@ llvm.func @omp_sections_with_clauses() -> () { // introduction mechanism itself is tested elsewhere. // CHECK-LABEL: @repeated_successor llvm.func @repeated_successor(%arg0: i64, %arg1: i64, %arg2: i64, %arg3: i1) { - omp.wsloop for (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) { + omp.wsloop { + omp.loopnest (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) { llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64) ^bb1(%0: i64): // 2 preds: ^bb0, ^bb0 omp.yield } + omp.terminator + } llvm.return } @@ -2549,8 +2619,8 @@ llvm.func @omp_opaque_pointers(%arg0 : !llvm.ptr, %arg1: !llvm.ptr, %expr: i32) // CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1 // CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1 // CHECK: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 1 -module attributes {omp.flags = #omp.flags} {} // ----- @@ -2595,8 +2665,8 @@ module attributes {omp.version = #omp.version} {} // CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 // CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 // CHECK: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 -module attributes {omp.flags = #omp.flags} {} // ----- diff --git a/mlir/test/Target/LLVMIR/openmp-nested.mlir b/mlir/test/Target/LLVMIR/openmp-nested.mlir index e1fdfdd24a3cb..35bc63fb3b4a5 100644 --- a/mlir/test/Target/LLVMIR/openmp-nested.mlir +++ b/mlir/test/Target/LLVMIR/openmp-nested.mlir @@ -11,13 +11,17 @@ module { %2 = llvm.mlir.constant(0 : index) : i64 %4 = llvm.mlir.constant(0 : i32) : i32 %12 = llvm.alloca %0 x i64 : (i64) -> !llvm.ptr - omp.wsloop for (%arg2) : i64 = (%2) to (%1) step (%0) { + omp.wsloop { + omp.loopnest (%arg2) : i64 = (%2) to (%1) step (%0) { omp.parallel { - omp.wsloop for (%arg3) : i64 = (%2) to (%0) step (%0) { + omp.wsloop { + omp.loopnest (%arg3) : i64 = (%2) to (%0) step (%0) { llvm.store %2, %12 : i64, !llvm.ptr omp.yield } omp.terminator + } + omp.terminator } %19 = llvm.load %12 : !llvm.ptr -> i64 %20 = llvm.trunc %19 : i64 to i32 @@ -27,6 +31,8 @@ module { omp.yield } omp.terminator + } + omp.terminator } %a4 = llvm.mlir.constant(0 : i32) : i32 llvm.return %a4 : i32 diff --git a/mlir/test/Target/LLVMIR/openmp-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-reduction.mlir index 9543458e950be..deeaf8219ce95 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction.mlir @@ -26,8 +26,8 @@ llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) { %c1 = llvm.mlir.constant(1 : i32) : i32 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr omp.parallel { - omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { %1 = llvm.mlir.constant(2.0 : f32) : f32 %2 = llvm.load %prv : !llvm.ptr -> f32 %3 = llvm.fadd %1, %2 : f32 @@ -35,6 +35,8 @@ llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) { omp.yield } omp.terminator + } + omp.terminator } llvm.return } @@ -105,8 +107,8 @@ llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) { %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr omp.parallel { - omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { %1 = llvm.mlir.constant(2.0 : f32) : f32 %3 = llvm.load %prv0 : !llvm.ptr -> f32 %4 = llvm.fadd %3, %1 : f32 @@ -117,6 +119,8 @@ llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) { omp.yield } omp.terminator + } + omp.terminator } llvm.return } @@ -195,8 +199,8 @@ llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) { %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr omp.parallel { - omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { %1 = llvm.mlir.constant(2.0 : f32) : f32 %3 = llvm.load %prv0 : !llvm.ptr -> f32 %4 = llvm.fadd %3, %1 : f32 @@ -204,6 +208,8 @@ llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) { omp.yield } omp.terminator + } + omp.terminator } llvm.return } @@ -280,8 +286,8 @@ llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) { %c1 = llvm.mlir.constant(1 : i32) : i32 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr omp.parallel { - omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { %1 = llvm.mlir.constant(2.0 : f32) : f32 %2 = llvm.load %prv : !llvm.ptr -> f32 %3 = llvm.fadd %2, %1 : f32 @@ -292,6 +298,8 @@ llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) { omp.yield } omp.terminator + } + omp.terminator } llvm.return } @@ -374,8 +382,8 @@ llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) { %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr omp.parallel { - omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @mul_f32 %2 -> %prv1 : !llvm.ptr) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @mul_f32 %2 -> %prv1 : !llvm.ptr) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { %1 = llvm.mlir.constant(2.0 : f32) : f32 %3 = llvm.load %prv0 : !llvm.ptr -> f32 %4 = llvm.fadd %3, %1 : f32 @@ -386,6 +394,8 @@ llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) { omp.yield } omp.terminator + } + omp.terminator } llvm.return } @@ -529,9 +539,10 @@ llvm.func @parallel_nested_workshare_reduction(%ub : i64) { %lb = llvm.mlir.constant(1 : i64) : i64 %step = llvm.mlir.constant(1 : i64) : i64 - + omp.parallel reduction(@add_i32 %0 -> %prv : !llvm.ptr) { - omp.wsloop for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { %ival = llvm.trunc %iv : i64 to i32 %lprv = llvm.load %prv : !llvm.ptr -> i32 %add = llvm.add %lprv, %ival : i32 @@ -539,6 +550,8 @@ llvm.func @parallel_nested_workshare_reduction(%ub : i64) { omp.yield } omp.terminator + } + omp.terminator } llvm.return