diff --git a/flang/include/flang/Semantics/openmp-directive-sets.h b/flang/include/flang/Semantics/openmp-directive-sets.h
index a4f27b00152e2..a40073ff47914 100644
--- a/flang/include/flang/Semantics/openmp-directive-sets.h
+++ b/flang/include/flang/Semantics/openmp-directive-sets.h
@@ -24,68 +24,80 @@ namespace llvm::omp {
 // - all<Directive>Set: All standalone or combined uses of the directive.
 
 static const OmpDirectiveSet topParallelSet{
-    Directive::OMPD_parallel,
-    Directive::OMPD_parallel_do,
     Directive::OMPD_parallel_do_simd,
+    Directive::OMPD_parallel_do,
+    Directive::OMPD_parallel_masked_taskloop_simd,
+    Directive::OMPD_parallel_masked_taskloop,
+    Directive::OMPD_parallel_master_taskloop_simd,
+    Directive::OMPD_parallel_master_taskloop,
     Directive::OMPD_parallel_sections,
     Directive::OMPD_parallel_workshare,
+    Directive::OMPD_parallel,
 };
 
 static const OmpDirectiveSet allParallelSet{
-    Directive::OMPD_distribute_parallel_do,
-    Directive::OMPD_distribute_parallel_do_simd,
-    Directive::OMPD_parallel,
-    Directive::OMPD_parallel_do,
-    Directive::OMPD_parallel_do_simd,
-    Directive::OMPD_parallel_sections,
-    Directive::OMPD_parallel_workshare,
-    Directive::OMPD_target_parallel,
-    Directive::OMPD_target_parallel_do,
-    Directive::OMPD_target_parallel_do_simd,
-    Directive::OMPD_target_teams_distribute_parallel_do,
-    Directive::OMPD_target_teams_distribute_parallel_do_simd,
-    Directive::OMPD_teams_distribute_parallel_do,
-    Directive::OMPD_teams_distribute_parallel_do_simd,
+    OmpDirectiveSet{
+        Directive::OMPD_distribute_parallel_do_simd,
+        Directive::OMPD_distribute_parallel_do,
+        Directive::OMPD_target_parallel_do_simd,
+        Directive::OMPD_target_parallel_do,
+        Directive::OMPD_target_parallel,
+        Directive::OMPD_target_teams_distribute_parallel_do_simd,
+        Directive::OMPD_target_teams_distribute_parallel_do,
+        Directive::OMPD_teams_distribute_parallel_do_simd,
+        Directive::OMPD_teams_distribute_parallel_do,
+    } | topParallelSet,
 };
 
 static const OmpDirectiveSet topDoSet{
-    Directive::OMPD_do,
     Directive::OMPD_do_simd,
+    Directive::OMPD_do,
 };
 
 static const OmpDirectiveSet allDoSet{
-    Directive::OMPD_distribute_parallel_do,
-    Directive::OMPD_distribute_parallel_do_simd,
-    Directive::OMPD_parallel_do,
-    Directive::OMPD_parallel_do_simd,
-    Directive::OMPD_do,
-    Directive::OMPD_do_simd,
-    Directive::OMPD_target_parallel_do,
-    Directive::OMPD_target_parallel_do_simd,
-    Directive::OMPD_target_teams_distribute_parallel_do,
-    Directive::OMPD_target_teams_distribute_parallel_do_simd,
-    Directive::OMPD_teams_distribute_parallel_do,
-    Directive::OMPD_teams_distribute_parallel_do_simd,
+    OmpDirectiveSet{
+        Directive::OMPD_distribute_parallel_do_simd,
+        Directive::OMPD_distribute_parallel_do,
+        Directive::OMPD_parallel_do_simd,
+        Directive::OMPD_parallel_do,
+        Directive::OMPD_target_parallel_do_simd,
+        Directive::OMPD_target_parallel_do,
+        Directive::OMPD_target_teams_distribute_parallel_do_simd,
+        Directive::OMPD_target_teams_distribute_parallel_do,
+        Directive::OMPD_teams_distribute_parallel_do_simd,
+        Directive::OMPD_teams_distribute_parallel_do,
+    } | topDoSet,
 };
 
 static const OmpDirectiveSet topTaskloopSet{
-    Directive::OMPD_taskloop,
     Directive::OMPD_taskloop_simd,
+    Directive::OMPD_taskloop,
 };
 
-static const OmpDirectiveSet allTaskloopSet{topTaskloopSet};
+static const OmpDirectiveSet allTaskloopSet{
+    OmpDirectiveSet{
+        Directive::OMPD_masked_taskloop_simd,
+        Directive::OMPD_masked_taskloop,
+        Directive::OMPD_master_taskloop_simd,
+        Directive::OMPD_master_taskloop,
+        Directive::OMPD_parallel_masked_taskloop_simd,
+        Directive::OMPD_parallel_masked_taskloop,
+        Directive::OMPD_parallel_master_taskloop_simd,
+        Directive::OMPD_parallel_master_taskloop,
+    } | topTaskloopSet,
+};
 
 static const OmpDirectiveSet topTargetSet{
-    Directive::OMPD_target,
-    Directive::OMPD_target_parallel,
-    Directive::OMPD_target_parallel_do,
     Directive::OMPD_target_parallel_do_simd,
+    Directive::OMPD_target_parallel_do,
+    Directive::OMPD_target_parallel,
     Directive::OMPD_target_simd,
-    Directive::OMPD_target_teams,
-    Directive::OMPD_target_teams_distribute,
-    Directive::OMPD_target_teams_distribute_parallel_do,
     Directive::OMPD_target_teams_distribute_parallel_do_simd,
+    Directive::OMPD_target_teams_distribute_parallel_do,
     Directive::OMPD_target_teams_distribute_simd,
+    Directive::OMPD_target_teams_distribute,
+    Directive::OMPD_target_teams,
+    Directive::OMPD_target,
 };
 
 static const OmpDirectiveSet allTargetSet{topTargetSet};
@@ -95,61 +107,61 @@ static const OmpDirectiveSet topSimdSet{
 };
 
 static const OmpDirectiveSet allSimdSet{
-    Directive::OMPD_distribute_parallel_do_simd,
-    Directive::OMPD_distribute_simd,
-    Directive::OMPD_do_simd,
-    Directive::OMPD_parallel_do_simd,
-    Directive::OMPD_simd,
-    Directive::OMPD_target_parallel_do_simd,
-    Directive::OMPD_target_simd,
-    Directive::OMPD_target_teams_distribute_parallel_do_simd,
-    Directive::OMPD_target_teams_distribute_simd,
-    Directive::OMPD_taskloop_simd,
-    Directive::OMPD_teams_distribute_parallel_do_simd,
-    Directive::OMPD_teams_distribute_simd,
+    OmpDirectiveSet{
+        Directive::OMPD_distribute_parallel_do_simd,
+        Directive::OMPD_distribute_simd,
+        Directive::OMPD_do_simd,
+        Directive::OMPD_masked_taskloop_simd,
+        Directive::OMPD_master_taskloop_simd,
+        Directive::OMPD_parallel_do_simd,
+        Directive::OMPD_parallel_masked_taskloop_simd,
+        Directive::OMPD_parallel_master_taskloop_simd,
+        Directive::OMPD_target_parallel_do_simd,
+        Directive::OMPD_target_simd,
+        Directive::OMPD_target_teams_distribute_parallel_do_simd,
+        Directive::OMPD_target_teams_distribute_simd,
+        Directive::OMPD_taskloop_simd,
+        Directive::OMPD_teams_distribute_parallel_do_simd,
+        Directive::OMPD_teams_distribute_simd,
+    } | topSimdSet,
 };
 
 static const OmpDirectiveSet topTeamsSet{
-    Directive::OMPD_teams,
-    Directive::OMPD_teams_distribute,
-    Directive::OMPD_teams_distribute_parallel_do,
     Directive::OMPD_teams_distribute_parallel_do_simd,
+    Directive::OMPD_teams_distribute_parallel_do,
     Directive::OMPD_teams_distribute_simd,
+    Directive::OMPD_teams_distribute,
+    Directive::OMPD_teams,
 };
 
 static const OmpDirectiveSet allTeamsSet{
-    llvm::omp::OMPD_target_teams,
-    llvm::omp::OMPD_target_teams_distribute,
-    llvm::omp::OMPD_target_teams_distribute_parallel_do,
-    llvm::omp::OMPD_target_teams_distribute_parallel_do_simd,
-    llvm::omp::OMPD_target_teams_distribute_simd,
-    llvm::omp::OMPD_teams,
-    llvm::omp::OMPD_teams_distribute,
-    llvm::omp::OMPD_teams_distribute_parallel_do,
-    llvm::omp::OMPD_teams_distribute_parallel_do_simd,
-    llvm::omp::OMPD_teams_distribute_simd,
+    OmpDirectiveSet{
+        llvm::omp::OMPD_target_teams_distribute_parallel_do_simd,
+        llvm::omp::OMPD_target_teams_distribute_parallel_do,
+        llvm::omp::OMPD_target_teams_distribute_simd,
+        llvm::omp::OMPD_target_teams_distribute,
+        llvm::omp::OMPD_target_teams,
+    } | topTeamsSet,
 };
 
 static const OmpDirectiveSet topDistributeSet{
-    Directive::OMPD_distribute,
-    Directive::OMPD_distribute_parallel_do,
     Directive::OMPD_distribute_parallel_do_simd,
+    Directive::OMPD_distribute_parallel_do,
     Directive::OMPD_distribute_simd,
+    Directive::OMPD_distribute,
 };
 
 static const OmpDirectiveSet allDistributeSet{
-    llvm::omp::OMPD_distribute,
-    llvm::omp::OMPD_distribute_parallel_do,
-    llvm::omp::OMPD_distribute_parallel_do_simd,
-    llvm::omp::OMPD_distribute_simd,
-    llvm::omp::OMPD_target_teams_distribute,
-    llvm::omp::OMPD_target_teams_distribute_parallel_do,
-    llvm::omp::OMPD_target_teams_distribute_parallel_do_simd,
-    llvm::omp::OMPD_target_teams_distribute_simd,
-    llvm::omp::OMPD_teams_distribute,
-    llvm::omp::OMPD_teams_distribute_parallel_do,
-    llvm::omp::OMPD_teams_distribute_parallel_do_simd,
-    llvm::omp::OMPD_teams_distribute_simd,
+    OmpDirectiveSet{
+        llvm::omp::OMPD_target_teams_distribute_parallel_do_simd,
+        llvm::omp::OMPD_target_teams_distribute_parallel_do,
+        llvm::omp::OMPD_target_teams_distribute_simd,
+        llvm::omp::OMPD_target_teams_distribute,
+        llvm::omp::OMPD_teams_distribute_parallel_do_simd,
+        llvm::omp::OMPD_teams_distribute_parallel_do,
+        llvm::omp::OMPD_teams_distribute_simd,
+        llvm::omp::OMPD_teams_distribute,
+    } | topDistributeSet,
 };
 
 //===----------------------------------------------------------------------===//
@@ -188,8 +200,16 @@ static const OmpDirectiveSet loopConstructSet{
     Directive::OMPD_distribute,
     Directive::OMPD_do_simd,
     Directive::OMPD_do,
+    Directive::OMPD_masked_taskloop,
+    Directive::OMPD_masked_taskloop_simd,
+    Directive::OMPD_master_taskloop,
+    Directive::OMPD_master_taskloop_simd,
     Directive::OMPD_parallel_do_simd,
     Directive::OMPD_parallel_do,
+    Directive::OMPD_parallel_masked_taskloop,
+    Directive::OMPD_parallel_masked_taskloop_simd,
+    Directive::OMPD_parallel_master_taskloop,
+    Directive::OMPD_parallel_master_taskloop_simd,
     Directive::OMPD_simd,
     Directive::OMPD_target_parallel_do_simd,
     Directive::OMPD_target_parallel_do,
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 833989cdc7023..4cb2aa74e1791 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -230,17 +230,30 @@ addUseDeviceClause(Fortran::lower::AbstractConverter &converter,
   }
 }
 
+static void convertLoopBounds(Fortran::lower::AbstractConverter &converter,
+                              mlir::Location loc, CollapseClauseOps &ops,
+                              std::size_t loopVarTypeSize) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  // The types of lower bound, upper bound, and step are converted into the
+  // type of the loop variable if necessary.
+  mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
+  for (unsigned it = 0; it < (unsigned)ops.loopLBVar.size(); it++) {
+    ops.loopLBVar[it] =
+        firOpBuilder.createConvert(loc, loopVarType, ops.loopLBVar[it]);
+    ops.loopUBVar[it] =
+        firOpBuilder.createConvert(loc, loopVarType, ops.loopUBVar[it]);
+    ops.loopStepVar[it] =
+        firOpBuilder.createConvert(loc, loopVarType, ops.loopStepVar[it]);
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // ClauseProcessor unique clauses
 //===----------------------------------------------------------------------===//
 
-bool ClauseProcessor::processCollapse(
-    mlir::Location currentLocation, Fortran::lower::pft::Evaluation &eval,
-    llvm::SmallVectorImpl<mlir::Value> &lowerBound,
-    llvm::SmallVectorImpl<mlir::Value> &upperBound,
-    llvm::SmallVectorImpl<mlir::Value> &step,
-    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &iv,
-    std::size_t &loopVarTypeSize) const {
+bool ClauseProcessor::processCollapse(mlir::Location currentLocation,
+                                      Fortran::lower::pft::Evaluation &eval,
+                                      CollapseClauseOps &ops) const {
   bool found = false;
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
@@ -259,7 +272,7 @@ bool ClauseProcessor::processCollapse(
     found = true;
   }
 
-  loopVarTypeSize = 0;
+  std::size_t loopVarTypeSize = 0;
   do {
     Fortran::lower::pft::Evaluation *doLoop =
         &doConstructEval->getFirstNestedEvaluation();
@@ -271,18 +284,18 @@ bool ClauseProcessor::processCollapse(
         std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
     assert(bounds && "Expected bounds for worksharing do loop");
     Fortran::lower::StatementContext stmtCtx;
-    lowerBound.push_back(fir::getBase(converter.genExprValue(
+    ops.loopLBVar.push_back(fir::getBase(converter.genExprValue(
         *Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
-    upperBound.push_back(fir::getBase(converter.genExprValue(
+    ops.loopUBVar.push_back(fir::getBase(converter.genExprValue(
         *Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
     if (bounds->step) {
-      step.push_back(fir::getBase(converter.genExprValue(
+      ops.loopStepVar.push_back(fir::getBase(converter.genExprValue(
           *Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
     } else { // If `step` is not present, assume it as `1`.
-      step.push_back(firOpBuilder.createIntegerConstant(
+      ops.loopStepVar.push_back(firOpBuilder.createIntegerConstant(
           currentLocation, firOpBuilder.getIntegerType(32), 1));
     }
-    iv.push_back(bounds->name.thing.symbol);
+    ops.loopIV.push_back(bounds->name.thing.symbol);
     loopVarTypeSize = std::max(loopVarTypeSize,
                                bounds->name.thing.symbol->GetUltimate().size());
     collapseValue--;
@@ -290,6 +303,9 @@ bool ClauseProcessor::processCollapse(
         &*std::next(doConstructEval->getNestedEvaluations().begin());
   } while (collapseValue > 0);
 
+  if (found)
+    convertLoopBounds(converter, currentLocation, ops, loopVarTypeSize);
+
   return found;
 }
 
@@ -316,7 +332,7 @@ bool ClauseProcessor::processDefault() const {
 }
 
 bool ClauseProcessor::processDevice(Fortran::lower::StatementContext &stmtCtx,
-                                    mlir::Value &result) const {
+                                    DeviceClauseOps &result) const {
   const Fortran::parser::CharBlock *source = nullptr;
   if (auto *deviceClause = findUniqueClause<ClauseTy::Device>(&source)) {
     mlir::Location clauseLocation = converter.genLocation(*source);
@@ -330,26 +346,26 @@ bool ClauseProcessor::processDevice(Fortran::lower::StatementContext &stmtCtx,
     }
     if (const auto *deviceExpr = Fortran::semantics::GetExpr(
             std::get<Fortran::parser::ScalarIntExpr>(deviceClause->v.t))) {
-      result = fir::getBase(converter.genExprValue(*deviceExpr, stmtCtx));
+      result.deviceVar =
+          fir::getBase(converter.genExprValue(*deviceExpr, stmtCtx));
     }
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processDeviceType(
-    mlir::omp::DeclareTargetDeviceType &result) const {
+bool ClauseProcessor::processDeviceType(DeviceTypeClauseOps &result) const {
   if (auto *deviceTypeClause = findUniqueClause<ClauseTy::DeviceType>()) {
     // Case: declare target ... device_type(any | host | nohost)
     switch (deviceTypeClause->v.v) {
     case Fortran::parser::OmpDeviceTypeClause::Type::Nohost:
-      result = mlir::omp::DeclareTargetDeviceType::nohost;
+      result.deviceType = mlir::omp::DeclareTargetDeviceType::nohost;
       break;
     case Fortran::parser::OmpDeviceTypeClause::Type::Host:
-      result = mlir::omp::DeclareTargetDeviceType::host;
+      result.deviceType = mlir::omp::DeclareTargetDeviceType::host;
       break;
     case Fortran::parser::OmpDeviceTypeClause::Type::Any:
-      result = mlir::omp::DeclareTargetDeviceType::any;
+      result.deviceType = mlir::omp::DeclareTargetDeviceType::any;
       break;
     }
     return true;
@@ -358,7 +374,7 @@ bool ClauseProcessor::processDeviceType(
 }
 
 bool ClauseProcessor::processFinal(Fortran::lower::StatementContext &stmtCtx,
-                                   mlir::Value &result) const {
+                                   FinalClauseOps &result) const {
   const Fortran::parser::CharBlock *source = nullptr;
   if (auto *finalClause = findUniqueClause<ClauseTy::Final>(&source)) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
@@ -366,38 +382,38 @@ bool ClauseProcessor::processFinal(Fortran::lower::StatementContext &stmtCtx,
 
     mlir::Value finalVal = fir::getBase(converter.genExprValue(
         *Fortran::semantics::GetExpr(finalClause->v), stmtCtx));
-    result = firOpBuilder.createConvert(clauseLocation,
-                                        firOpBuilder.getI1Type(), finalVal);
+    result.finalVar = firOpBuilder.createConvert(
+        clauseLocation, firOpBuilder.getI1Type(), finalVal);
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processHint(mlir::IntegerAttr &result) const {
+bool ClauseProcessor::processHint(HintClauseOps &result) const {
   if (auto *hintClause = findUniqueClause<ClauseTy::Hint>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
     const auto *expr = Fortran::semantics::GetExpr(hintClause->v);
     int64_t hintValue = *Fortran::evaluate::ToInt64(*expr);
-    result = firOpBuilder.getI64IntegerAttr(hintValue);
+    result.hintAttr = firOpBuilder.getI64IntegerAttr(hintValue);
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processMergeable(mlir::UnitAttr &result) const {
-  return markClauseOccurrence<ClauseTy::Mergeable>(result);
+bool ClauseProcessor::processMergeable(MergeableClauseOps &result) const {
+  return markClauseOccurrence<ClauseTy::Mergeable>(result.mergeableAttr);
 }
 
-bool ClauseProcessor::processNowait(mlir::UnitAttr &result) const {
-  return markClauseOccurrence<ClauseTy::Nowait>(result);
+bool ClauseProcessor::processNowait(NowaitClauseOps &result) const {
+  return markClauseOccurrence<ClauseTy::Nowait>(result.nowaitAttr);
 }
 
 bool ClauseProcessor::processNumTeams(Fortran::lower::StatementContext &stmtCtx,
-                                      mlir::Value &result) const {
+                                      NumTeamsClauseOps &result) const {
   // TODO Get lower and upper bounds for num_teams when parser is updated to
   // accept both.
   if (auto *numTeamsClause = findUniqueClause<ClauseTy::NumTeams>()) {
-    result = fir::getBase(converter.genExprValue(
+    result.numTeamsUpperVar = fir::getBase(converter.genExprValue(
         *Fortran::semantics::GetExpr(numTeamsClause->v), stmtCtx));
     return true;
   }
@@ -405,17 +421,18 @@ bool ClauseProcessor::processNumTeams(Fortran::lower::StatementContext &stmtCtx,
 }
 
 bool ClauseProcessor::processNumThreads(
-    Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const {
+    Fortran::lower::StatementContext &stmtCtx,
+    NumThreadsClauseOps &result) const {
   if (auto *numThreadsClause = findUniqueClause<ClauseTy::NumThreads>()) {
     // OMPIRBuilder expects `NUM_THREADS` clause as a `Value`.
-    result = fir::getBase(converter.genExprValue(
+    result.numThreadsVar = fir::getBase(converter.genExprValue(
         *Fortran::semantics::GetExpr(numThreadsClause->v), stmtCtx));
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processOrdered(mlir::IntegerAttr &result) const {
+bool ClauseProcessor::processOrdered(OrderedClauseOps &result) const {
   if (auto *orderedClause = findUniqueClause<ClauseTy::Ordered>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
     int64_t orderedClauseValue = 0l;
@@ -423,48 +440,45 @@ bool ClauseProcessor::processOrdered(mlir::IntegerAttr &result) const {
       const auto *expr = Fortran::semantics::GetExpr(orderedClause->v);
       orderedClauseValue = *Fortran::evaluate::ToInt64(*expr);
     }
-    result = firOpBuilder.getI64IntegerAttr(orderedClauseValue);
+    result.orderedAttr = firOpBuilder.getI64IntegerAttr(orderedClauseValue);
     return true;
   }
   return false;
 }
 
 bool ClauseProcessor::processPriority(Fortran::lower::StatementContext &stmtCtx,
-                                      mlir::Value &result) const {
+                                      PriorityClauseOps &result) const {
   if (auto *priorityClause = findUniqueClause<ClauseTy::Priority>()) {
-    result = fir::getBase(converter.genExprValue(
+    result.priorityVar = fir::getBase(converter.genExprValue(
         *Fortran::semantics::GetExpr(priorityClause->v), stmtCtx));
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processProcBind(
-    mlir::omp::ClauseProcBindKindAttr &result) const {
+bool ClauseProcessor::processProcBind(ProcBindClauseOps &result) const {
   if (auto *procBindClause = findUniqueClause<ClauseTy::ProcBind>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-    result = genProcBindKindAttr(firOpBuilder, procBindClause);
+    result.procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processSafelen(mlir::IntegerAttr &result) const {
+bool ClauseProcessor::processSafelen(SafelenClauseOps &result) const {
   if (auto *safelenClause = findUniqueClause<ClauseTy::Safelen>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
     const auto *expr = Fortran::semantics::GetExpr(safelenClause->v);
     const std::optional<std::int64_t> safelenVal =
         Fortran::evaluate::ToInt64(*expr);
-    result = firOpBuilder.getI64IntegerAttr(*safelenVal);
+    result.safelenAttr = firOpBuilder.getI64IntegerAttr(*safelenVal);
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processSchedule(
-    mlir::omp::ClauseScheduleKindAttr &valAttr,
-    mlir::omp::ScheduleModifierAttr &modifierAttr,
-    mlir::UnitAttr &simdModifierAttr) const {
+bool ClauseProcessor::processSchedule(Fortran::lower::StatementContext &stmtCtx,
+                                      ScheduleClauseOps &result) const {
   if (auto *scheduleClause = findUniqueClause<ClauseTy::Schedule>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
     mlir::MLIRContext *context = firOpBuilder.getContext();
@@ -496,26 +510,21 @@ bool ClauseProcessor::processSchedule(
         getScheduleModifier(scheduleClause->v);
 
     if (scheduleModifier != mlir::omp::ScheduleModifier::none)
-      modifierAttr =
+      result.scheduleModAttr =
           mlir::omp::ScheduleModifierAttr::get(context, scheduleModifier);
 
     if (getSimdModifier(scheduleClause->v) != mlir::omp::ScheduleModifier::none)
-      simdModifierAttr = firOpBuilder.getUnitAttr();
+      result.scheduleSimdAttr = firOpBuilder.getUnitAttr();
 
-    valAttr = mlir::omp::ClauseScheduleKindAttr::get(context, scheduleKind);
-    return true;
-  }
-  return false;
-}
+    result.scheduleValAttr =
+        mlir::omp::ClauseScheduleKindAttr::get(context, scheduleKind);
 
-bool ClauseProcessor::processScheduleChunk(
-    Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const {
-  if (auto *scheduleClause = findUniqueClause<ClauseTy::Schedule>()) {
     if (const auto &chunkExpr =
             std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
                 scheduleClause->v.t)) {
       if (const auto *expr = Fortran::semantics::GetExpr(*chunkExpr)) {
-        result = fir::getBase(converter.genExprValue(*expr, stmtCtx));
+        result.scheduleChunkVar =
+            fir::getBase(converter.genExprValue(*expr, stmtCtx));
       }
     }
     return true;
@@ -523,48 +532,47 @@ bool ClauseProcessor::processScheduleChunk(
   return false;
 }
 
-bool ClauseProcessor::processSimdlen(mlir::IntegerAttr &result) const {
+bool ClauseProcessor::processSimdlen(SimdlenClauseOps &result) const {
   if (auto *simdlenClause = findUniqueClause<ClauseTy::Simdlen>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
     const auto *expr = Fortran::semantics::GetExpr(simdlenClause->v);
     const std::optional<std::int64_t> simdlenVal =
         Fortran::evaluate::ToInt64(*expr);
-    result = firOpBuilder.getI64IntegerAttr(*simdlenVal);
+    result.simdlenAttr = firOpBuilder.getI64IntegerAttr(*simdlenVal);
     return true;
   }
   return false;
 }
 
 bool ClauseProcessor::processThreadLimit(
-    Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const {
+    Fortran::lower::StatementContext &stmtCtx,
+    ThreadLimitClauseOps &result) const {
   if (auto *threadLmtClause = findUniqueClause<ClauseTy::ThreadLimit>()) {
-    result = fir::getBase(converter.genExprValue(
+    result.threadLimitVar = fir::getBase(converter.genExprValue(
         *Fortran::semantics::GetExpr(threadLmtClause->v), stmtCtx));
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processUntied(mlir::UnitAttr &result) const {
-  return markClauseOccurrence<ClauseTy::Untied>(result);
+bool ClauseProcessor::processUntied(UntiedClauseOps &result) const {
+  return markClauseOccurrence<ClauseTy::Untied>(result.untiedAttr);
 }
 
 //===----------------------------------------------------------------------===//
 // ClauseProcessor repeatable clauses
 //===----------------------------------------------------------------------===//
 
-bool ClauseProcessor::processAllocate(
-    llvm::SmallVectorImpl<mlir::Value> &allocatorOperands,
-    llvm::SmallVectorImpl<mlir::Value> &allocateOperands) const {
+bool ClauseProcessor::processAllocate(AllocateClauseOps &result) const {
   return findRepeatableClause<ClauseTy::Allocate>(
       [&](const ClauseTy::Allocate *allocateClause,
           const Fortran::parser::CharBlock &) {
-        genAllocateClause(converter, allocateClause->v, allocatorOperands,
-                          allocateOperands);
+        genAllocateClause(converter, allocateClause->v, result.allocatorVars,
+                          result.allocateVars);
       });
 }
 
-bool ClauseProcessor::processCopyin() const {
+bool ClauseProcessor::processCopyin(CopyinClauseOps &) const {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
   firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
@@ -709,10 +717,8 @@ createCopyFunc(mlir::Location loc, Fortran::lower::AbstractConverter &converter,
   return funcOp;
 }
 
-bool ClauseProcessor::processCopyPrivate(
-    mlir::Location currentLocation,
-    llvm::SmallVectorImpl<mlir::Value> &copyPrivateVars,
-    llvm::SmallVectorImpl<mlir::Attribute> &copyPrivateFuncs) const {
+bool ClauseProcessor::processCopyprivate(mlir::Location currentLocation,
+                                         CopyprivateClauseOps &result) const {
   auto addCopyPrivateVar = [&](Fortran::semantics::Symbol *sym) {
     mlir::Value symVal = converter.getSymbolAddress(*sym);
     auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>();
@@ -739,10 +745,10 @@ bool ClauseProcessor::processCopyPrivate(
       cpVar = alloca;
     }
 
-    copyPrivateVars.push_back(cpVar);
+    result.copyprivateVars.push_back(cpVar);
     mlir::func::FuncOp funcOp =
         createCopyFunc(currentLocation, converter, cpVar.getType(), attrs);
-    copyPrivateFuncs.push_back(mlir::SymbolRefAttr::get(funcOp));
+    result.copyprivateFuncs.push_back(mlir::SymbolRefAttr::get(funcOp));
   };
 
   bool hasCopyPrivate = findRepeatableClause<ClauseTy::Copyprivate>(
@@ -765,9 +771,7 @@ bool ClauseProcessor::processCopyPrivate(
   return hasCopyPrivate;
 }
 
-bool ClauseProcessor::processDepend(
-    llvm::SmallVectorImpl<mlir::Attribute> &dependTypeOperands,
-    llvm::SmallVectorImpl<mlir::Value> &dependOperands) const {
+bool ClauseProcessor::processDepend(DependClauseOps &result) const {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
   return findRepeatableClause<ClauseTy::Depend>(
@@ -780,8 +784,8 @@ bool ClauseProcessor::processDepend(
                     .t);
         mlir::omp::ClauseTaskDependAttr dependTypeOperand =
             genDependKindAttr(firOpBuilder, dependClause);
-        dependTypeOperands.insert(dependTypeOperands.end(), depVal.size(),
-                                  dependTypeOperand);
+        result.dependTypeAttrs.insert(result.dependTypeAttrs.end(),
+                                      depVal.size(), dependTypeOperand);
         for (const Fortran::parser::Designator &ompObject : depVal) {
           Fortran::semantics::Symbol *sym = nullptr;
           std::visit(
@@ -803,14 +807,14 @@ bool ClauseProcessor::processDepend(
                   }},
               (ompObject).u);
           const mlir::Value variable = converter.getSymbolAddress(*sym);
-          dependOperands.push_back(variable);
+          result.dependVars.push_back(variable);
         }
       });
 }
 
 bool ClauseProcessor::processIf(
     Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName,
-    mlir::Value &result) const {
+    IfClauseOps &result) const {
   bool found = false;
   findRepeatableClause<ClauseTy::If>(
       [&](const ClauseTy::If *ifClause,
@@ -821,21 +825,21 @@ bool ClauseProcessor::processIf(
         // Assume that, at most, a single 'if' clause will be applicable to the
         // given directive.
         if (operand) {
-          result = operand;
+          result.ifVar = operand;
           found = true;
         }
       });
   return found;
 }
 
-bool ClauseProcessor::processLink(
-    llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
+bool ClauseProcessor::processLink(EnterLinkToClauseOps &result) const {
   return findRepeatableClause<ClauseTy::Link>(
       [&](const ClauseTy::Link *linkClause,
           const Fortran::parser::CharBlock &) {
         // Case: declare target link(var1, var2)...
-        gatherFuncAndVarSyms(
-            linkClause->v, mlir::omp::DeclareTargetCaptureClause::link, result);
+        gatherFuncAndVarSyms(linkClause->v,
+                             mlir::omp::DeclareTargetCaptureClause::link,
+                             result.symbolAndClause);
       });
 }
 
@@ -863,14 +867,9 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
   return op;
 }
 
-bool ClauseProcessor::processMap(
-    mlir::Location currentLocation, const llvm::omp::Directive &directive,
-    Fortran::lower::StatementContext &stmtCtx,
-    llvm::SmallVectorImpl<mlir::Value> &mapOperands,
-    llvm::SmallVectorImpl<mlir::Type> *mapSymTypes,
-    llvm::SmallVectorImpl<mlir::Location> *mapSymLocs,
-    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> *mapSymbols)
-    const {
+bool ClauseProcessor::processMap(mlir::Location currentLocation,
+                                 Fortran::lower::StatementContext &stmtCtx,
+                                 MapClauseOps &result) const {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   return findRepeatableClause<ClauseTy::Map>(
       [&](const ClauseTy::Map *mapClause,
@@ -946,100 +945,92 @@ bool ClauseProcessor::processMap(
                   mapTypeBits),
               mlir::omp::VariableCaptureKind::ByRef, symAddr.getType());
 
-          mapOperands.push_back(mapOp);
-          if (mapSymTypes)
-            mapSymTypes->push_back(symAddr.getType());
-          if (mapSymLocs)
-            mapSymLocs->push_back(symAddr.getLoc());
-
-          if (mapSymbols)
-            mapSymbols->push_back(getOmpObjectSymbol(ompObject));
+          result.mapVars.push_back(mapOp);
+          if (result.mapSymTypes)
+            result.mapSymTypes->push_back(symAddr.getType());
+          if (result.mapSymLocs)
+            result.mapSymLocs->push_back(symAddr.getLoc());
+          if (result.mapSymbols)
+            result.mapSymbols->push_back(getOmpObjectSymbol(ompObject));
         }
       });
 }
 
 bool ClauseProcessor::processTargetReduction(
-    llvm::SmallVector<const Fortran::semantics::Symbol *> &reductionSymbols)
-    const {
+    TargetReductionClauseOps &result) const {
   return findRepeatableClause<ClauseTy::Reduction>(
       [&](const ClauseTy::Reduction *reductionClause,
           const Fortran::parser::CharBlock &) {
         ReductionProcessor rp;
-        rp.addReductionSym(reductionClause->v, reductionSymbols);
+        rp.addReductionSym(reductionClause->v, result.targetReductionSymbols);
       });
 }
 
-bool ClauseProcessor::processReduction(
-    mlir::Location currentLocation,
-    llvm::SmallVectorImpl<mlir::Value> &reductionVars,
-    llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
-    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> *reductionSymbols)
-    const {
+bool ClauseProcessor::processReduction(mlir::Location currentLocation,
+                                       ReductionClauseOps &result) const {
   return findRepeatableClause<ClauseTy::Reduction>(
       [&](const ClauseTy::Reduction *reductionClause,
           const Fortran::parser::CharBlock &) {
         ReductionProcessor rp;
         rp.addReductionDecl(currentLocation, converter, reductionClause->v,
-                            reductionVars, reductionDeclSymbols,
-                            reductionSymbols);
+                            result.reductionVars, result.reductionDeclSymbols,
+                            result.reductionSymbols ? &*result.reductionSymbols
+                                                    : nullptr);
+        result.reductionTypes.reserve(result.reductionVars.size());
+        llvm::transform(result.reductionVars,
+                        std::back_inserter(result.reductionTypes),
+                        [](mlir::Value v) { return v.getType(); });
       });
 }
 
-bool ClauseProcessor::processSectionsReduction(
-    mlir::Location currentLocation) const {
+bool ClauseProcessor::processSectionsReduction(mlir::Location currentLocation,
+                                               ReductionClauseOps &) const {
   return findRepeatableClause<ClauseTy::Reduction>(
       [&](const ClauseTy::Reduction *, const Fortran::parser::CharBlock &) {
+        // Either implement special handling or remove this method and use the
+        // generic processReduction() method instead.
         TODO(currentLocation, "OMPC_Reduction");
       });
 }
 
-bool ClauseProcessor::processTo(
-    llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
+bool ClauseProcessor::processTo(EnterLinkToClauseOps &result) const {
   return findRepeatableClause<ClauseTy::To>(
       [&](const ClauseTy::To *toClause, const Fortran::parser::CharBlock &) {
         // Case: declare target to(func, var1, var2)...
         gatherFuncAndVarSyms(toClause->v,
-                             mlir::omp::DeclareTargetCaptureClause::to, result);
+                             mlir::omp::DeclareTargetCaptureClause::to,
+                             result.symbolAndClause);
       });
 }
 
-bool ClauseProcessor::processEnter(
-    llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
+bool ClauseProcessor::processEnter(EnterLinkToClauseOps &result) const {
   return findRepeatableClause<ClauseTy::Enter>(
       [&](const ClauseTy::Enter *enterClause,
           const Fortran::parser::CharBlock &) {
         // Case: declare target enter(func, var1, var2)...
         gatherFuncAndVarSyms(enterClause->v,
                              mlir::omp::DeclareTargetCaptureClause::enter,
-                             result);
+                             result.symbolAndClause);
       });
 }
 
-bool ClauseProcessor::processUseDeviceAddr(
-    llvm::SmallVectorImpl<mlir::Value> &operands,
-    llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
-    llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &useDeviceSymbols)
-    const {
+bool ClauseProcessor::processUseDeviceAddr(UseDeviceClauseOps &result) const {
   return findRepeatableClause<ClauseTy::UseDeviceAddr>(
       [&](const ClauseTy::UseDeviceAddr *devAddrClause,
           const Fortran::parser::CharBlock &) {
-        addUseDeviceClause(converter, devAddrClause->v, operands,
-                           useDeviceTypes, useDeviceLocs, useDeviceSymbols);
+        addUseDeviceClause(converter, devAddrClause->v,
+                           result.useDeviceAddrVars, result.useDeviceTypes,
+                           result.useDeviceLocs, result.useDeviceSymbols);
       });
 }
 
-bool ClauseProcessor::processUseDevicePtr(
-    llvm::SmallVectorImpl<mlir::Value> &operands,
-    llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
-    llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &useDeviceSymbols)
-    const {
+bool ClauseProcessor::processUseDevicePtr(UseDeviceClauseOps &result) const {
   return findRepeatableClause<ClauseTy::UseDevicePtr>(
       [&](const ClauseTy::UseDevicePtr *devPtrClause,
           const Fortran::parser::CharBlock &) {
-        addUseDeviceClause(converter, devPtrClause->v, operands, useDeviceTypes,
-                           useDeviceLocs, useDeviceSymbols);
+        addUseDeviceClause(converter, devPtrClause->v, result.useDevicePtrVars,
+                           result.useDeviceTypes, result.useDeviceLocs,
+                           result.useDeviceSymbols);
       });
 }
 } // namespace omp
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 0b91aca3d1344..c6b0b73dd1a56 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -13,6 +13,7 @@
 #define FORTRAN_LOWER_CLAUASEPROCESSOR_H
 
 #include "DirectivesCommon.h"
+#include "OperationClauses.h"
 #include "ReductionProcessor.h"
 #include "Utils.h"
 #include "flang/Lower/AbstractConverter.h"
@@ -54,101 +55,62 @@ class ClauseProcessor {
       : converter(converter), semaCtx(semaCtx), clauses(clauses) {}
 
   // 'Unique' clauses: They can appear at most once in the clause list.
-  bool
-  processCollapse(mlir::Location currentLocation,
-                  Fortran::lower::pft::Evaluation &eval,
-                  llvm::SmallVectorImpl<mlir::Value> &lowerBound,
-                  llvm::SmallVectorImpl<mlir::Value> &upperBound,
-                  llvm::SmallVectorImpl<mlir::Value> &step,
-                  llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &iv,
-                  std::size_t &loopVarTypeSize) const;
+  bool processCollapse(mlir::Location currentLocation,
+                       Fortran::lower::pft::Evaluation &eval,
+                       CollapseClauseOps &result) const;
   bool processDefault() const;
   bool processDevice(Fortran::lower::StatementContext &stmtCtx,
-                     mlir::Value &result) const;
-  bool processDeviceType(mlir::omp::DeclareTargetDeviceType &result) const;
+                     DeviceClauseOps &result) const;
+  bool processDeviceType(DeviceTypeClauseOps &result) const;
   bool processFinal(Fortran::lower::StatementContext &stmtCtx,
-                    mlir::Value &result) const;
-  bool processHint(mlir::IntegerAttr &result) const;
-  bool processMergeable(mlir::UnitAttr &result) const;
-  bool processNowait(mlir::UnitAttr &result) const;
+                    FinalClauseOps &result) const;
+  bool processHint(HintClauseOps &result) const;
+  bool processMergeable(MergeableClauseOps &result) const;
+  bool processNowait(NowaitClauseOps &result) const;
   bool processNumTeams(Fortran::lower::StatementContext &stmtCtx,
-                       mlir::Value &result) const;
+                       NumTeamsClauseOps &result) const;
   bool processNumThreads(Fortran::lower::StatementContext &stmtCtx,
-                         mlir::Value &result) const;
-  bool processOrdered(mlir::IntegerAttr &result) const;
+                         NumThreadsClauseOps &result) const;
+  bool processOrdered(OrderedClauseOps &result) const;
   bool processPriority(Fortran::lower::StatementContext &stmtCtx,
-                       mlir::Value &result) const;
-  bool processProcBind(mlir::omp::ClauseProcBindKindAttr &result) const;
-  bool processSafelen(mlir::IntegerAttr &result) const;
-  bool processSchedule(mlir::omp::ClauseScheduleKindAttr &valAttr,
-                       mlir::omp::ScheduleModifierAttr &modifierAttr,
-                       mlir::UnitAttr &simdModifierAttr) const;
-  bool processScheduleChunk(Fortran::lower::StatementContext &stmtCtx,
-                            mlir::Value &result) const;
-  bool processSimdlen(mlir::IntegerAttr &result) const;
+                       PriorityClauseOps &result) const;
+  bool processProcBind(ProcBindClauseOps &result) const;
+  bool processSafelen(SafelenClauseOps &result) const;
+  bool processSchedule(Fortran::lower::StatementContext &stmtCtx,
+                       ScheduleClauseOps &result) const;
+  bool processSimdlen(SimdlenClauseOps &result) const;
   bool processThreadLimit(Fortran::lower::StatementContext &stmtCtx,
-                          mlir::Value &result) const;
-  bool processUntied(mlir::UnitAttr &result) const;
+                          ThreadLimitClauseOps &result) const;
+  bool processUntied(UntiedClauseOps &result) const;
 
   // 'Repeatable' clauses: They can appear multiple times in the clause list.
-  bool
-  processAllocate(llvm::SmallVectorImpl<mlir::Value> &allocatorOperands,
-                  llvm::SmallVectorImpl<mlir::Value> &allocateOperands) const;
-  bool processCopyin() const;
-  bool processCopyPrivate(
-      mlir::Location currentLocation,
-      llvm::SmallVectorImpl<mlir::Value> &copyPrivateVars,
-      llvm::SmallVectorImpl<mlir::Attribute> &copyPrivateFuncs) const;
-  bool processDepend(llvm::SmallVectorImpl<mlir::Attribute> &dependTypeOperands,
-                     llvm::SmallVectorImpl<mlir::Value> &dependOperands) const;
-  bool
-  processEnter(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
+  bool processAllocate(AllocateClauseOps &result) const;
+  bool processCopyin(CopyinClauseOps &result) const;
+  bool processCopyprivate(mlir::Location currentLocation,
+                          CopyprivateClauseOps &result) const;
+  bool processDepend(DependClauseOps &result) const;
+  bool processEnter(EnterLinkToClauseOps &result) const;
   bool
   processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName,
-            mlir::Value &result) const;
-  bool
-  processLink(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
+            IfClauseOps &result) const;
+  bool processLink(EnterLinkToClauseOps &result) const;
 
   // This method is used to process a map clause.
-  // The optional parameters - mapSymTypes, mapSymLocs & mapSymbols are used to
-  // store the original type, location and Fortran symbol for the map operands.
-  // They may be used later on to create the block_arguments for some of the
-  // target directives that require it.
   bool processMap(mlir::Location currentLocation,
-                  const llvm::omp::Directive &directive,
                   Fortran::lower::StatementContext &stmtCtx,
-                  llvm::SmallVectorImpl<mlir::Value> &mapOperands,
-                  llvm::SmallVectorImpl<mlir::Type> *mapSymTypes = nullptr,
-                  llvm::SmallVectorImpl<mlir::Location> *mapSymLocs = nullptr,
-                  llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
-                      *mapSymbols = nullptr) const;
-  bool
-  processReduction(mlir::Location currentLocation,
-                   llvm::SmallVectorImpl<mlir::Value> &reductionVars,
-                   llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
-                   llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
-                       *reductionSymbols = nullptr) const;
-  bool processTargetReduction(
-      llvm::SmallVector<const Fortran::semantics::Symbol *> &reductionSymbols)
-      const;
-  bool processSectionsReduction(mlir::Location currentLocation) const;
-  bool processTo(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
-  bool
-  processUseDeviceAddr(llvm::SmallVectorImpl<mlir::Value> &operands,
-                       llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
-                       llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-                       llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
-                           &useDeviceSymbols) const;
-  bool
-  processUseDevicePtr(llvm::SmallVectorImpl<mlir::Value> &operands,
-                      llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
-                      llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-                      llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
-                          &useDeviceSymbols) const;
+                  MapClauseOps &result) const;
+  bool processReduction(mlir::Location currentLocation,
+                        ReductionClauseOps &result) const;
+  bool processTargetReduction(TargetReductionClauseOps &result) const;
+  bool processSectionsReduction(mlir::Location currentLocation,
+                                ReductionClauseOps &result) const;
+  bool processTo(EnterLinkToClauseOps &result) const;
+  bool processUseDeviceAddr(UseDeviceClauseOps &result) const;
+  bool processUseDevicePtr(UseDeviceClauseOps &result) const;
 
   template <typename T>
   bool processMotionClauses(Fortran::lower::StatementContext &stmtCtx,
-                            llvm::SmallVectorImpl<mlir::Value> &mapOperands);
+                            MapClauseOps &result);
 
   // Call this method for these clauses that should be supported but are not
   // implemented yet. It triggers a compilation error if any of the given
@@ -189,8 +151,7 @@ class ClauseProcessor {
 
 template <typename T>
 bool ClauseProcessor::processMotionClauses(
-    Fortran::lower::StatementContext &stmtCtx,
-    llvm::SmallVectorImpl<mlir::Value> &mapOperands) {
+    Fortran::lower::StatementContext &stmtCtx, MapClauseOps &result) {
   return findRepeatableClause<T>(
       [&](const T *motionClause, const Fortran::parser::CharBlock &source) {
         mlir::Location clauseLocation = converter.genLocation(source);
@@ -232,7 +193,7 @@ bool ClauseProcessor::processMotionClauses(
                   mapTypeBits),
               mlir::omp::VariableCaptureKind::ByRef, symAddr.getType());
 
-          mapOperands.push_back(mapOp);
+          result.mapVars.push_back(mapOp);
         }
       });
 }
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 717b8cc0276a3..5c27a91bd469f 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -213,21 +213,23 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
             firOpBuilder.restoreInsertionPoint(unstructuredSectionsIP);
           }
         }
-      } else if (mlir::isa<mlir::omp::WsLoopOp>(op)) {
-        // Update the original variable just before exiting the worksharing
-        // loop. Conversion as follows:
+      } else if (mlir::isa<mlir::omp::LoopNestOp>(op)) {
+        // TODO Check that the change from WsLoopOp to LoopNestOp didn't
+        // break anything here.
+        // Update the original variable just before exiting the loop. Conversion
+        // as follows:
         //
-        //                       omp.wsloop {
-        // omp.wsloop {            ...
-        //    ...                  store
-        //    store       ===>     %v = arith.addi %iv, %step
-        //    omp.yield            %cmp = %step < 0 ? %v < %ub : %v > %ub
-        // }                       fir.if %cmp {
-        //                           fir.store %v to %loopIV
-        //                           ^%lpv_update_blk:
-        //                         }
-        //                         omp.yield
+        //                     omp.loopnest {
+        // omp.loopnest {        ...
+        //    ...                store
+        //    store       ===>   %v = arith.addi %iv, %step
+        //    omp.yield          %cmp = %step < 0 ? %v < %ub : %v > %ub
+        // }                     fir.if %cmp {
+        //                         fir.store %v to %loopIV
+        //                         ^%lpv_update_blk:
         //                       }
+        //                       omp.yield
+        //                     }
         //
 
         // Only generate the compare once in presence of multiple LastPrivate
@@ -242,8 +244,8 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
 
         mlir::Value iv = op->getRegion(0).front().getArguments()[0];
         mlir::Value ub =
-            mlir::dyn_cast<mlir::omp::WsLoopOp>(op).getUpperBound()[0];
-        mlir::Value step = mlir::dyn_cast<mlir::omp::WsLoopOp>(op).getStep()[0];
+            mlir::cast<mlir::omp::LoopNestOp>(op).getUpperBound()[0];
+        mlir::Value step = mlir::cast<mlir::omp::LoopNestOp>(op).getStep()[0];
 
         // v = iv + step
         // cmp = step < 0 ? v < ub : v > ub
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index 9f7301df07598..c22d1e966df90 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -24,6 +24,7 @@ namespace omp {
 
 class DataSharingProcessor {
 public:
+  // TODO Replace with PrivateClauseOps.
   /// Collects all the information needed for delayed privatization. This can be
   /// used by ops with data-sharing clauses to properly generate their regions
   /// (e.g. add region arguments) and map the original SSA values to their
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index e499e16c19e04..fdb8ef4977bc9 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -38,38 +38,6 @@
 
 using namespace Fortran::lower::omp;
 
-//===----------------------------------------------------------------------===//
-// Code generation helper functions
-//===----------------------------------------------------------------------===//
-
-static Fortran::lower::pft::Evaluation *
-getCollapsedLoopEval(Fortran::lower::pft::Evaluation &eval, int collapseValue) {
-  // Return the Evaluation of the innermost collapsed loop, or the current one
-  // if there was no COLLAPSE.
-  if (collapseValue == 0)
-    return &eval;
-
-  Fortran::lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation();
-  for (int i = 1; i < collapseValue; i++) {
-    // The nested evaluations should be DoConstructs (i.e. they should form
-    // a loop nest). Each DoConstruct is a tuple <NonLabelDoStmt, Block,
-    // EndDoStmt>.
-    assert(curEval->isA<Fortran::parser::DoConstruct>());
-    curEval = &*std::next(curEval->getNestedEvaluations().begin());
-  }
-  return curEval;
-}
-
-static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
-                                 Fortran::lower::pft::Evaluation &eval,
-                                 int collapseValue = 0) {
-  Fortran::lower::pft::Evaluation *curEval =
-      getCollapsedLoopEval(eval, collapseValue);
-
-  for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations())
-    converter.genEval(e);
-}
-
 //===----------------------------------------------------------------------===//
 // HostClausesInsertionGuard
 //===----------------------------------------------------------------------===//
@@ -139,6 +107,113 @@ class HostClausesInsertionGuard {
   }
 };
 
+//===----------------------------------------------------------------------===//
+// OpWithBodyGenInfo
+//===----------------------------------------------------------------------===//
+
+struct OpWithBodyGenInfo {
+  /// A type for a code-gen callback function. This takes as argument the op for
+  /// which the code is being generated and returns the arguments of the op's
+  /// region.
+  using GenOMPRegionEntryCBFn =
+      std::function<llvm::SmallVector<const Fortran::semantics::Symbol *>(
+          mlir::Operation *)>;
+
+  OpWithBodyGenInfo(Fortran::lower::AbstractConverter &converter,
+                    Fortran::semantics::SemanticsContext &semaCtx,
+                    mlir::Location loc, Fortran::lower::pft::Evaluation &eval)
+      : converter(converter), semaCtx(semaCtx), loc(loc), eval(eval) {}
+
+  OpWithBodyGenInfo &setGenNested(bool value) {
+    genNested = value;
+    return *this;
+  }
+
+  OpWithBodyGenInfo &setOuterCombined(bool value) {
+    outerCombined = value;
+    return *this;
+  }
+
+  OpWithBodyGenInfo &setClauses(const Fortran::parser::OmpClauseList *value) {
+    clauses = value;
+    return *this;
+  }
+
+  OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) {
+    dsp = value;
+    return *this;
+  }
+
+  OpWithBodyGenInfo &
+  setReductions(llvm::ArrayRef<const Fortran::semantics::Symbol *> symbols,
+                llvm::ArrayRef<mlir::Type> types) {
+    reductionSymbols = symbols;
+    reductionTypes = types;
+    return *this;
+  }
+
+  OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) {
+    genRegionEntryCB = value;
+    return *this;
+  }
+
+  /// [inout] converter to use for the clauses.
+  Fortran::lower::AbstractConverter &converter;
+  /// [in] Semantics context
+  Fortran::semantics::SemanticsContext &semaCtx;
+  /// [in] location in source code.
+  mlir::Location loc;
+  /// [in] current PFT node/evaluation.
+  Fortran::lower::pft::Evaluation &eval;
+  /// [in] whether to generate FIR for nested evaluations
+  bool genNested = true;
+  /// [in] is this an outer operation - prevents privatization.
+  bool outerCombined = false;
+  /// [in] list of clauses to process.
+  const Fortran::parser::OmpClauseList *clauses = nullptr;
+  /// [in] if provided, processes the construct's data-sharing attributes.
+  DataSharingProcessor *dsp = nullptr;
+  /// [in] if provided, list of reduction symbols
+  llvm::ArrayRef<const Fortran::semantics::Symbol *> reductionSymbols;
+  /// [in] if provided, list of reduction types
+  llvm::ArrayRef<mlir::Type> reductionTypes;
+  /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block
+  /// is created in the region.
+  GenOMPRegionEntryCBFn genRegionEntryCB = nullptr;
+};
+
+//===----------------------------------------------------------------------===//
+// Code generation helper functions
+//===----------------------------------------------------------------------===//
+
+static Fortran::lower::pft::Evaluation *
+getCollapsedLoopEval(Fortran::lower::pft::Evaluation &eval, int collapseValue) {
+  // Return the Evaluation of the innermost collapsed loop, or the current one
+  // if there was no COLLAPSE.
+  if (collapseValue == 0)
+    return &eval;
+
+  Fortran::lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation();
+  for (int i = 1; i < collapseValue; i++) {
+    // The nested evaluations should be DoConstructs (i.e. they should form
+    // a loop nest). Each DoConstruct is a tuple <NonLabelDoStmt, Block,
+    // EndDoStmt>.
+    assert(curEval->isA<Fortran::parser::DoConstruct>());
+    curEval = &*std::next(curEval->getNestedEvaluations().begin());
+  }
+  return curEval;
+}
+
+static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
+                                 Fortran::lower::pft::Evaluation &eval,
+                                 int collapseValue = 0) {
+  Fortran::lower::pft::Evaluation *curEval =
+      getCollapsedLoopEval(eval, collapseValue);
+
+  for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations())
+    converter.genEval(e);
+}
+
 static fir::GlobalOp globalInitialization(
     Fortran::lower::AbstractConverter &converter,
     fir::FirOpBuilder &firOpBuilder, const Fortran::semantics::Symbol &sym,
@@ -282,268 +357,80 @@ static void threadPrivatizeVars(Fortran::lower::AbstractConverter &converter,
   firOpBuilder.restoreInsertionPoint(insPt);
 }
 
-static mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter,
-                                 std::size_t loopVarTypeSize) {
-  // OpenMP runtime requires 32-bit or 64-bit loop variables.
-  loopVarTypeSize = loopVarTypeSize * 8;
-  if (loopVarTypeSize < 32) {
-    loopVarTypeSize = 32;
-  } else if (loopVarTypeSize > 64) {
-    loopVarTypeSize = 64;
-    mlir::emitWarning(converter.getCurrentLocation(),
-                      "OpenMP loop iteration variable cannot have more than 64 "
-                      "bits size and will be narrowed into 64 bits.");
-  }
-  assert((loopVarTypeSize == 32 || loopVarTypeSize == 64) &&
-         "OpenMP loop iteration variable size must be transformed into 32-bit "
-         "or 64-bit");
-  return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize);
-}
+/// Create the body (block) for an OpenMP Operation.
+///
+/// \param [in]   op - the operation the body belongs to.
+/// \param [in] info - options controlling code-gen for the construction.
+template <typename Op>
+static void createBodyOfOp(Op &op, const OpWithBodyGenInfo &info) {
+  fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder();
 
-static mlir::Operation *
-createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter,
-                              mlir::Location loc, mlir::Value indexVal,
-                              const Fortran::semantics::Symbol *sym) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
-  firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
+  auto insertMarker = [](fir::FirOpBuilder &builder) {
+    mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(),
+                                                     builder.getIndexType());
+    return undef.getDefiningOp();
+  };
 
-  mlir::Type tempTy = converter.genType(*sym);
-  mlir::Value temp = firOpBuilder.create<fir::AllocaOp>(
-      loc, tempTy, /*pinned=*/true, /*lengthParams=*/mlir::ValueRange{},
-      /*shapeParams*/ mlir::ValueRange{},
-      llvm::ArrayRef<mlir::NamedAttribute>{
-          fir::getAdaptToByRefAttr(firOpBuilder)});
-  converter.bindSymbol(*sym, temp);
-  firOpBuilder.restoreInsertionPoint(insPt);
-  mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal);
-  mlir::Operation *storeOp = firOpBuilder.create<fir::StoreOp>(
-      loc, cvtVal, converter.getSymbolAddress(*sym));
-  return storeOp;
-}
+  // If an argument for the region is provided then create the block with that
+  // argument. Also update the symbol's address with the mlir argument value.
+  // e.g. For loops the argument is the induction variable. And all further
+  // uses of the induction variable should use this mlir value.
+  auto regionArgs =
+      [&]() -> llvm::SmallVector<const Fortran::semantics::Symbol *> {
+    if (info.genRegionEntryCB != nullptr) {
+      return info.genRegionEntryCB(op);
+    }
 
-static mlir::Value
-calculateTripCount(Fortran::lower::AbstractConverter &converter,
-                   mlir::Location loc, llvm::ArrayRef<mlir::Value> lbs,
-                   llvm::ArrayRef<mlir::Value> ubs,
-                   llvm::ArrayRef<mlir::Value> steps) {
-  using namespace mlir::arith;
-  assert(lbs.size() == ubs.size() && lbs.size() == steps.size() &&
-         !lbs.empty() && "Invalid bounds or step");
+    firOpBuilder.createBlock(&op.getRegion());
+    return {};
+  }();
+  // Mark the earliest insertion point.
+  mlir::Operation *marker = insertMarker(firOpBuilder);
 
-  fir::FirOpBuilder &b = converter.getFirOpBuilder();
+  // If it is an unstructured region and is not the outer region of a combined
+  // construct, create empty blocks for all evaluations.
+  if (info.eval.lowerAsUnstructured() && !info.outerCombined)
+    Fortran::lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp,
+                                            mlir::omp::YieldOp>(
+        firOpBuilder, info.eval.getNestedEvaluations());
 
-  // Get the bit width of an integer-like type.
-  auto widthOf = [](mlir::Type ty) -> unsigned {
-    if (mlir::isa<mlir::IndexType>(ty)) {
-      return mlir::IndexType::kInternalStorageBitWidth;
-    }
-    if (auto tyInt = mlir::dyn_cast<mlir::IntegerType>(ty)) {
-      return tyInt.getWidth();
-    }
-    llvm_unreachable("Unexpected type");
-  };
+  // Start with privatization, so that the lowering of the nested
+  // code will use the right symbols.
+  // TODO Check that nothing broke from replacing WsLoopOp and SimdLoopOp here.
+  constexpr bool isLoop = std::is_same_v<Op, mlir::omp::LoopNestOp>;
+  bool privatize = info.clauses && !info.outerCombined;
 
-  // For a type that is either IntegerType or IndexType, return the
-  // equivalent IntegerType. In the former case this is a no-op.
-  auto asIntTy = [&](mlir::Type ty) -> mlir::IntegerType {
-    if (ty.isIndex()) {
-      return mlir::IntegerType::get(ty.getContext(), widthOf(ty));
+  firOpBuilder.setInsertionPoint(marker);
+  std::optional<DataSharingProcessor> tempDsp;
+  if (privatize) {
+    if (!info.dsp) {
+      tempDsp.emplace(info.converter, *info.clauses, info.eval);
+      tempDsp->processStep1();
     }
-    assert(ty.isIntOrIndex() && "Unexpected type");
-    return mlir::cast<mlir::IntegerType>(ty);
-  };
+  }
 
-  // For two given values, establish a common signless IntegerType
-  // that can represent any value of type of x and of type of y,
-  // and return the pair of x, y converted to the new type.
-  auto unifyToSignless =
-      [&](fir::FirOpBuilder &b, mlir::Value x,
-          mlir::Value y) -> std::pair<mlir::Value, mlir::Value> {
-    auto tyX = asIntTy(x.getType()), tyY = asIntTy(y.getType());
-    unsigned width = std::max(widthOf(tyX), widthOf(tyY));
-    auto wideTy = mlir::IntegerType::get(b.getContext(), width,
-                                         mlir::IntegerType::Signless);
-    return std::make_pair(b.createConvert(loc, wideTy, x),
-                          b.createConvert(loc, wideTy, y));
-  };
+  if constexpr (std::is_same_v<Op, mlir::omp::ParallelOp>) {
+    threadPrivatizeVars(info.converter, info.eval);
+    if (info.clauses) {
+      firOpBuilder.setInsertionPoint(marker);
+      CopyinClauseOps clauseOps;
+      ClauseProcessor(info.converter, info.semaCtx, *info.clauses)
+          .processCopyin(clauseOps);
+    }
+  }
 
-  // Start with signless i32 by default.
-  auto tripCount = b.createIntegerConstant(loc, b.getI32Type(), 1);
-
-  for (auto [origLb, origUb, origStep] : llvm::zip(lbs, ubs, steps)) {
-    auto tmpS0 = b.createIntegerConstant(loc, origStep.getType(), 0);
-    auto [step, step0] = unifyToSignless(b, origStep, tmpS0);
-    auto reverseCond = b.create<CmpIOp>(loc, CmpIPredicate::slt, step, step0);
-    auto negStep = b.create<SubIOp>(loc, step0, step);
-    mlir::Value absStep = b.create<SelectOp>(loc, reverseCond, negStep, step);
-
-    auto [lb, ub] = unifyToSignless(b, origLb, origUb);
-    auto start = b.create<SelectOp>(loc, reverseCond, ub, lb);
-    auto end = b.create<SelectOp>(loc, reverseCond, lb, ub);
-
-    mlir::Value range = b.create<SubIOp>(loc, end, start);
-    auto rangeCond = b.create<CmpIOp>(loc, CmpIPredicate::slt, end, start);
-    std::tie(range, absStep) = unifyToSignless(b, range, absStep);
-    // numSteps = (range /u absStep) + 1
-    auto numSteps =
-        b.create<AddIOp>(loc, b.create<DivUIOp>(loc, range, absStep),
-                         b.createIntegerConstant(loc, range.getType(), 1));
-
-    auto trip0 = b.createIntegerConstant(loc, numSteps.getType(), 0);
-    auto loopTripCount = b.create<SelectOp>(loc, rangeCond, trip0, numSteps);
-    auto [totalTC, thisTC] = unifyToSignless(b, tripCount, loopTripCount);
-    tripCount = b.create<MulIOp>(loc, totalTC, thisTC);
-  }
-
-  return tripCount;
-}
-
-struct OpWithBodyGenInfo {
-  /// A type for a code-gen callback function. This takes as argument the op for
-  /// which the code is being generated and returns the arguments of the op's
-  /// region.
-  using GenOMPRegionEntryCBFn =
-      std::function<llvm::SmallVector<const Fortran::semantics::Symbol *>(
-          mlir::Operation *)>;
-
-  OpWithBodyGenInfo(Fortran::lower::AbstractConverter &converter,
-                    Fortran::semantics::SemanticsContext &semaCtx,
-                    mlir::Location loc, Fortran::lower::pft::Evaluation &eval)
-      : converter(converter), semaCtx(semaCtx), loc(loc), eval(eval) {}
-
-  OpWithBodyGenInfo &setGenNested(bool value) {
-    genNested = value;
-    return *this;
-  }
-
-  OpWithBodyGenInfo &setOuterCombined(bool value) {
-    outerCombined = value;
-    return *this;
-  }
-
-  OpWithBodyGenInfo &setClauses(const Fortran::parser::OmpClauseList *value) {
-    clauses = value;
-    return *this;
-  }
-
-  OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) {
-    dsp = value;
-    return *this;
-  }
-
-  OpWithBodyGenInfo &
-  setReductions(llvm::SmallVector<const Fortran::semantics::Symbol *> *value1,
-                llvm::SmallVector<mlir::Type> *value2) {
-    reductionSymbols = value1;
-    reductionTypes = value2;
-    return *this;
-  }
-
-  OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) {
-    genRegionEntryCB = value;
-    return *this;
-  }
-
-  /// [inout] converter to use for the clauses.
-  Fortran::lower::AbstractConverter &converter;
-  /// [in] Semantics context
-  Fortran::semantics::SemanticsContext &semaCtx;
-  /// [in] location in source code.
-  mlir::Location loc;
-  /// [in] current PFT node/evaluation.
-  Fortran::lower::pft::Evaluation &eval;
-  /// [in] whether to generate FIR for nested evaluations
-  bool genNested = true;
-  /// [in] is this an outer operation - prevents privatization.
-  bool outerCombined = false;
-  /// [in] list of clauses to process.
-  const Fortran::parser::OmpClauseList *clauses = nullptr;
-  /// [in] if provided, processes the construct's data-sharing attributes.
-  DataSharingProcessor *dsp = nullptr;
-  /// [in] if provided, list of reduction symbols
-  llvm::SmallVector<const Fortran::semantics::Symbol *> *reductionSymbols =
-      nullptr;
-  /// [in] if provided, list of reduction types
-  llvm::SmallVector<mlir::Type> *reductionTypes = nullptr;
-  /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block
-  /// is created in the region.
-  GenOMPRegionEntryCBFn genRegionEntryCB = nullptr;
-};
-
-/// Create the body (block) for an OpenMP Operation.
-///
-/// \param [in]   op - the operation the body belongs to.
-/// \param [in] info - options controlling code-gen for the construction.
-template <typename Op>
-static void createBodyOfOp(Op &op, OpWithBodyGenInfo &info) {
-  fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder();
-
-  auto insertMarker = [](fir::FirOpBuilder &builder) {
-    mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(),
-                                                     builder.getIndexType());
-    return undef.getDefiningOp();
-  };
-
-  // If an argument for the region is provided then create the block with that
-  // argument. Also update the symbol's address with the mlir argument value.
-  // e.g. For loops the argument is the induction variable. And all further
-  // uses of the induction variable should use this mlir value.
-  auto regionArgs =
-      [&]() -> llvm::SmallVector<const Fortran::semantics::Symbol *> {
-    if (info.genRegionEntryCB != nullptr) {
-      return info.genRegionEntryCB(op);
-    }
-
-    firOpBuilder.createBlock(&op.getRegion());
-    return {};
-  }();
-  // Mark the earliest insertion point.
-  mlir::Operation *marker = insertMarker(firOpBuilder);
-
-  // If it is an unstructured region and is not the outer region of a combined
-  // construct, create empty blocks for all evaluations.
-  if (info.eval.lowerAsUnstructured() && !info.outerCombined)
-    Fortran::lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp,
-                                            mlir::omp::YieldOp>(
-        firOpBuilder, info.eval.getNestedEvaluations());
-
-  // Start with privatization, so that the lowering of the nested
-  // code will use the right symbols.
-  constexpr bool isLoop = std::is_same_v<Op, mlir::omp::WsLoopOp> ||
-                          std::is_same_v<Op, mlir::omp::SimdLoopOp>;
-  bool privatize = info.clauses && !info.outerCombined;
-
-  firOpBuilder.setInsertionPoint(marker);
-  std::optional<DataSharingProcessor> tempDsp;
-  if (privatize) {
-    if (!info.dsp) {
-      tempDsp.emplace(info.converter, *info.clauses, info.eval);
-      tempDsp->processStep1();
-    }
-  }
-
-  if constexpr (std::is_same_v<Op, mlir::omp::ParallelOp>) {
-    threadPrivatizeVars(info.converter, info.eval);
-    if (info.clauses) {
-      firOpBuilder.setInsertionPoint(marker);
-      ClauseProcessor(info.converter, info.semaCtx, *info.clauses)
-          .processCopyin();
-    }
-  }
-
-  if (info.genNested) {
-    // genFIR(Evaluation&) tries to patch up unterminated blocks, causing
-    // a lot of complications for our approach if the terminator generation
-    // is delayed past this point. Insert a temporary terminator here, then
-    // delete it.
-    firOpBuilder.setInsertionPointToEnd(&op.getRegion().back());
-    auto *temp = Fortran::lower::genOpenMPTerminator(
-        firOpBuilder, op.getOperation(), info.loc);
-    firOpBuilder.setInsertionPointAfter(marker);
-    genNestedEvaluations(info.converter, info.eval);
-    temp->erase();
-  }
+  if (info.genNested) {
+    // genFIR(Evaluation&) tries to patch up unterminated blocks, causing
+    // a lot of complications for our approach if the terminator generation
+    // is delayed past this point. Insert a temporary terminator here, then
+    // delete it.
+    firOpBuilder.setInsertionPointToEnd(&op.getRegion().back());
+    auto *temp = Fortran::lower::genOpenMPTerminator(
+        firOpBuilder, op.getOperation(), info.loc);
+    firOpBuilder.setInsertionPointAfter(marker);
+    genNestedEvaluations(info.converter, info.eval);
+    temp->erase();
+  }
 
   // Get or create a unique exiting block from the given region, or
   // return nullptr if there is no exiting block.
@@ -672,453 +559,43 @@ static void genBodyOfTargetDataOp(
     genNestedEvaluations(converter, eval);
 }
 
-template <typename OpTy, typename... Args>
-static OpTy genOpWithBody(OpWithBodyGenInfo &info, Args &&...args) {
-  auto op = info.converter.getFirOpBuilder().create<OpTy>(
-      info.loc, std::forward<Args>(args)...);
-  createBodyOfOp<OpTy>(op, info);
-  return op;
-}
-
-static mlir::omp::MasterOp
-genMasterOp(Fortran::lower::AbstractConverter &converter,
-            Fortran::semantics::SemanticsContext &semaCtx,
-            Fortran::lower::pft::Evaluation &eval, bool genNested,
-            mlir::Location currentLocation) {
-  return genOpWithBody<mlir::omp::MasterOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested),
-      /*resultTypes=*/mlir::TypeRange());
-}
-
-static mlir::omp::OrderedRegionOp
-genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
-                   Fortran::semantics::SemanticsContext &semaCtx,
-                   Fortran::lower::pft::Evaluation &eval, bool genNested,
-                   mlir::Location currentLocation) {
-  return genOpWithBody<mlir::omp::OrderedRegionOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested),
-      /*simd=*/false);
-}
-
-static bool evalHasSiblings(Fortran::lower::pft::Evaluation &eval) {
-  return eval.parent.visit(Fortran::common::visitors{
-      [&](const Fortran::lower::pft::Program &parent) {
-        return parent.getUnits().size() + parent.getCommonBlocks().size() > 1;
-      },
-      [&](const Fortran::lower::pft::Evaluation &parent) {
-        for (auto &sibling : *parent.evaluationList)
-          if (&sibling != &eval && !sibling.isEndStmt())
-            return true;
-
-        return false;
-      },
-      [&](const auto &parent) {
-        for (auto &sibling : parent.evaluationList)
-          if (&sibling != &eval && !sibling.isEndStmt())
-            return true;
+// This functions creates a block for the body of the targetOp's region. It adds
+// all the symbols present in mapSymbols as block arguments to this block.
+static void genBodyOfTargetOp(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval, bool genNested,
+    mlir::omp::TargetOp &targetOp,
+    const llvm::SmallVector<mlir::Type> &mapSymTypes,
+    const llvm::SmallVector<mlir::Location> &mapSymLocs,
+    const llvm::SmallVector<const Fortran::semantics::Symbol *> &mapSymbols,
+    const mlir::Location &currentLocation) {
+  assert(mapSymTypes.size() == mapSymLocs.size());
 
-        return false;
-      }});
-}
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  mlir::Region &region = targetOp.getRegion();
 
-static mlir::omp::ParallelOp
-genParallelOp(Fortran::lower::AbstractConverter &converter,
-              Fortran::lower::SymMap &symTable,
-              Fortran::semantics::SemanticsContext &semaCtx,
-              Fortran::lower::pft::Evaluation &eval, bool genNested,
-              mlir::Location currentLocation,
-              const Fortran::parser::OmpClauseList &clauseList,
-              bool outerCombined = false) {
-  Fortran::lower::StatementContext stmtCtx;
-  mlir::Value ifClauseOperand, numThreadsClauseOperand;
-  mlir::omp::ClauseProcBindKindAttr procBindKindAttr;
-  llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands,
-      reductionVars;
-  llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
-  llvm::SmallVector<const Fortran::semantics::Symbol *> reductionSymbols;
-
-  ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Parallel,
-               ifClauseOperand);
-  cp.processProcBind(procBindKindAttr);
-  cp.processDefault();
-  cp.processAllocate(allocatorOperands, allocateOperands);
-  if (!outerCombined)
-    cp.processReduction(currentLocation, reductionVars, reductionDeclSymbols,
-                        &reductionSymbols);
+  auto *regionBlock =
+      firOpBuilder.createBlock(&region, {}, mapSymTypes, mapSymLocs);
 
-  llvm::SmallVector<mlir::Type> reductionTypes;
-  reductionTypes.reserve(reductionVars.size());
-  llvm::transform(reductionVars, std::back_inserter(reductionTypes),
-                  [](mlir::Value v) { return v.getType(); });
+  // Clones the `bounds` placing them inside the target region and returns them.
+  auto cloneBound = [&](mlir::Value bound) {
+    if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
+      mlir::Operation *clonedOp = bound.getDefiningOp()->clone();
+      regionBlock->push_back(clonedOp);
+      return clonedOp->getResult(0);
 
-  auto reductionCallback = [&](mlir::Operation *op) {
-    llvm::SmallVector<mlir::Location> locs(reductionVars.size(),
-                                           currentLocation);
-    auto *block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {},
-                                                          reductionTypes, locs);
-    for (auto [arg, prv] :
-         llvm::zip_equal(reductionSymbols, block->getArguments())) {
-      converter.bindSymbol(*arg, prv);
     }
-    return reductionSymbols;
+    TODO(converter.getCurrentLocation(),
+         "target map clause operand unsupported bound type");
   };
 
-  auto offloadModOp =
-      llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp());
-  mlir::omp::TargetOp targetOp =
-      findParentTargetOp(converter.getFirOpBuilder());
-
-  bool mustEvalOutsideTarget =
-      targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval);
-  if (mustEvalOutsideTarget) {
-    HostClausesInsertionGuard guard(converter.getFirOpBuilder());
-    cp.processNumThreads(stmtCtx, numThreadsClauseOperand);
-  } else {
-    cp.processNumThreads(stmtCtx, numThreadsClauseOperand);
-  }
-
-  OpWithBodyGenInfo genInfo =
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested)
-          .setOuterCombined(outerCombined)
-          .setClauses(&clauseList)
-          .setReductions(&reductionSymbols, &reductionTypes)
-          .setGenRegionEntryCb(reductionCallback);
-
-  if (!enableDelayedPrivatization) {
-    auto parallelOp = genOpWithBody<mlir::omp::ParallelOp>(
-        genInfo,
-        /*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
-        numThreadsClauseOperand, allocateOperands, allocatorOperands,
-        reductionVars,
-        reductionDeclSymbols.empty()
-            ? nullptr
-            : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                   reductionDeclSymbols),
-        procBindKindAttr, /*private_vars=*/llvm::SmallVector<mlir::Value>{},
-        /*privatizers=*/nullptr);
-
-    if (mustEvalOutsideTarget) {
-      if (numThreadsClauseOperand)
-        targetOp.getNumThreadsMutable().assign(numThreadsClauseOperand);
-    } else {
-      if (numThreadsClauseOperand)
-        parallelOp.getNumThreadsVarMutable().assign(numThreadsClauseOperand);
-    }
-
-    return parallelOp;
-  }
-
-  bool privatize = !outerCombined;
-  DataSharingProcessor dsp(converter, clauseList, eval,
-                           /*useDelayedPrivatization=*/true, &symTable);
-
-  if (privatize)
-    dsp.processStep1();
-
-  const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo();
-
-  auto genRegionEntryCB = [&](mlir::Operation *op) {
-    auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);
-
-    llvm::SmallVector<mlir::Location> reductionLocs(reductionVars.size(),
-                                                    currentLocation);
-
-    mlir::OperandRange privateVars = parallelOp.getPrivateVars();
-    mlir::Region &region = parallelOp.getRegion();
-
-    llvm::SmallVector<mlir::Type> privateVarTypes = reductionTypes;
-    privateVarTypes.reserve(privateVarTypes.size() + privateVars.size());
-    llvm::transform(privateVars, std::back_inserter(privateVarTypes),
-                    [](mlir::Value v) { return v.getType(); });
-
-    llvm::SmallVector<mlir::Location> privateVarLocs = reductionLocs;
-    privateVarLocs.reserve(privateVarLocs.size() + privateVars.size());
-    llvm::transform(privateVars, std::back_inserter(privateVarLocs),
-                    [](mlir::Value v) { return v.getLoc(); });
-
-    converter.getFirOpBuilder().createBlock(&region, /*insertPt=*/{},
-                                            privateVarTypes, privateVarLocs);
-
-    llvm::SmallVector<const Fortran::semantics::Symbol *> allSymbols =
-        reductionSymbols;
-    allSymbols.append(delayedPrivatizationInfo.symbols);
-    for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
-      converter.bindSymbol(*arg, prv);
-    }
-
-    return allSymbols;
-  };
-
-  // TODO Merge with the reduction CB.
-  genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp);
-
-  llvm::SmallVector<mlir::Attribute> privatizers(
-      delayedPrivatizationInfo.privatizers.begin(),
-      delayedPrivatizationInfo.privatizers.end());
-
-  auto parallelOp = genOpWithBody<mlir::omp::ParallelOp>(
-      genInfo,
-      /*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
-      /*num_threads_var=*/nullptr, allocateOperands, allocatorOperands,
-      reductionVars,
-      reductionDeclSymbols.empty()
-          ? nullptr
-          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                 reductionDeclSymbols),
-      procBindKindAttr, delayedPrivatizationInfo.originalAddresses,
-      delayedPrivatizationInfo.privatizers.empty()
-          ? nullptr
-          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                 privatizers));
-
-  if (mustEvalOutsideTarget) {
-    if (numThreadsClauseOperand)
-      targetOp.getNumThreadsMutable().assign(numThreadsClauseOperand);
-  } else {
-    if (numThreadsClauseOperand)
-      parallelOp.getNumThreadsVarMutable().assign(numThreadsClauseOperand);
-  }
-
-  return parallelOp;
-}
-
-static mlir::omp::SectionOp
-genSectionOp(Fortran::lower::AbstractConverter &converter,
-             Fortran::semantics::SemanticsContext &semaCtx,
-             Fortran::lower::pft::Evaluation &eval, bool genNested,
-             mlir::Location currentLocation,
-             const Fortran::parser::OmpClauseList &sectionsClauseList) {
-  // Currently only private/firstprivate clause is handled, and
-  // all privatization is done within `omp.section` operations.
-  return genOpWithBody<mlir::omp::SectionOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested)
-          .setClauses(&sectionsClauseList));
-}
-
-static mlir::omp::SingleOp
-genSingleOp(Fortran::lower::AbstractConverter &converter,
-            Fortran::semantics::SemanticsContext &semaCtx,
-            Fortran::lower::pft::Evaluation &eval, bool genNested,
-            mlir::Location currentLocation,
-            const Fortran::parser::OmpClauseList &beginClauseList,
-            const Fortran::parser::OmpClauseList &endClauseList) {
-  llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands;
-  llvm::SmallVector<mlir::Value> copyPrivateVars;
-  llvm::SmallVector<mlir::Attribute> copyPrivateFuncs;
-  mlir::UnitAttr nowaitAttr;
-
-  ClauseProcessor cp(converter, semaCtx, beginClauseList);
-  cp.processAllocate(allocatorOperands, allocateOperands);
-
-  ClauseProcessor ecp(converter, semaCtx, endClauseList);
-  ecp.processNowait(nowaitAttr);
-  ecp.processCopyPrivate(currentLocation, copyPrivateVars, copyPrivateFuncs);
-
-  return genOpWithBody<mlir::omp::SingleOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested)
-          .setClauses(&beginClauseList),
-      allocateOperands, allocatorOperands, copyPrivateVars,
-      copyPrivateFuncs.empty()
-          ? nullptr
-          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                 copyPrivateFuncs),
-      nowaitAttr);
-}
-
-static mlir::omp::TaskOp
-genTaskOp(Fortran::lower::AbstractConverter &converter,
-          Fortran::semantics::SemanticsContext &semaCtx,
-          Fortran::lower::pft::Evaluation &eval, bool genNested,
-          mlir::Location currentLocation,
-          const Fortran::parser::OmpClauseList &clauseList) {
-  Fortran::lower::StatementContext stmtCtx;
-  mlir::Value ifClauseOperand, finalClauseOperand, priorityClauseOperand;
-  mlir::UnitAttr untiedAttr, mergeableAttr;
-  llvm::SmallVector<mlir::Attribute> dependTypeOperands;
-  llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands,
-      dependOperands;
-
-  ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Task,
-               ifClauseOperand);
-  cp.processAllocate(allocatorOperands, allocateOperands);
-  cp.processDefault();
-  cp.processFinal(stmtCtx, finalClauseOperand);
-  cp.processUntied(untiedAttr);
-  cp.processMergeable(mergeableAttr);
-  cp.processPriority(stmtCtx, priorityClauseOperand);
-  cp.processDepend(dependTypeOperands, dependOperands);
-  cp.processTODO<Fortran::parser::OmpClause::InReduction,
-                 Fortran::parser::OmpClause::Detach,
-                 Fortran::parser::OmpClause::Affinity>(
-      currentLocation, llvm::omp::Directive::OMPD_task);
-
-  return genOpWithBody<mlir::omp::TaskOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested)
-          .setClauses(&clauseList),
-      ifClauseOperand, finalClauseOperand, untiedAttr, mergeableAttr,
-      /*in_reduction_vars=*/mlir::ValueRange(),
-      /*in_reductions=*/nullptr, priorityClauseOperand,
-      dependTypeOperands.empty()
-          ? nullptr
-          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                 dependTypeOperands),
-      dependOperands, allocateOperands, allocatorOperands);
-}
-
-static mlir::omp::TaskGroupOp
-genTaskGroupOp(Fortran::lower::AbstractConverter &converter,
-               Fortran::semantics::SemanticsContext &semaCtx,
-               Fortran::lower::pft::Evaluation &eval, bool genNested,
-               mlir::Location currentLocation,
-               const Fortran::parser::OmpClauseList &clauseList) {
-  llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands;
-  ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processAllocate(allocatorOperands, allocateOperands);
-  cp.processTODO<Fortran::parser::OmpClause::TaskReduction>(
-      currentLocation, llvm::omp::Directive::OMPD_taskgroup);
-  return genOpWithBody<mlir::omp::TaskGroupOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested)
-          .setClauses(&clauseList),
-      /*task_reduction_vars=*/mlir::ValueRange(),
-      /*task_reductions=*/nullptr, allocateOperands, allocatorOperands);
-}
-
-static mlir::omp::DataOp
-genDataOp(Fortran::lower::AbstractConverter &converter,
-          Fortran::semantics::SemanticsContext &semaCtx,
-          Fortran::lower::pft::Evaluation &eval, bool genNested,
-          mlir::Location currentLocation,
-          const Fortran::parser::OmpClauseList &clauseList) {
-  Fortran::lower::StatementContext stmtCtx;
-  mlir::Value ifClauseOperand, deviceOperand;
-  llvm::SmallVector<mlir::Value> mapOperands, devicePtrOperands,
-      deviceAddrOperands;
-  llvm::SmallVector<mlir::Type> useDeviceTypes;
-  llvm::SmallVector<mlir::Location> useDeviceLocs;
-  llvm::SmallVector<const Fortran::semantics::Symbol *> useDeviceSymbols;
-
-  ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetData,
-               ifClauseOperand);
-  cp.processDevice(stmtCtx, deviceOperand);
-  cp.processUseDevicePtr(devicePtrOperands, useDeviceTypes, useDeviceLocs,
-                         useDeviceSymbols);
-  cp.processUseDeviceAddr(deviceAddrOperands, useDeviceTypes, useDeviceLocs,
-                          useDeviceSymbols);
-  cp.processMap(currentLocation, llvm::omp::Directive::OMPD_target_data,
-                stmtCtx, mapOperands);
-
-  auto dataOp = converter.getFirOpBuilder().create<mlir::omp::DataOp>(
-      currentLocation, ifClauseOperand, deviceOperand, devicePtrOperands,
-      deviceAddrOperands, mapOperands);
-  genBodyOfTargetDataOp(converter, semaCtx, eval, genNested, dataOp,
-                        useDeviceTypes, useDeviceLocs, useDeviceSymbols,
-                        currentLocation);
-  return dataOp;
-}
-
-template <typename OpTy>
-static OpTy
-genEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter,
-                         Fortran::semantics::SemanticsContext &semaCtx,
-                         mlir::Location currentLocation,
-                         const Fortran::parser::OmpClauseList &clauseList) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  Fortran::lower::StatementContext stmtCtx;
-  mlir::Value ifClauseOperand, deviceOperand;
-  mlir::UnitAttr nowaitAttr;
-  llvm::SmallVector<mlir::Value> mapOperands, dependOperands;
-  llvm::SmallVector<mlir::Attribute> dependTypeOperands;
-
-  Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName;
-  // GCC 9.3.0 emits a (probably) bogus warning about an unused variable.
-  [[maybe_unused]] llvm::omp::Directive directive;
-  if constexpr (std::is_same_v<OpTy, mlir::omp::EnterDataOp>) {
-    directiveName =
-        Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetEnterData;
-    directive = llvm::omp::Directive::OMPD_target_enter_data;
-  } else if constexpr (std::is_same_v<OpTy, mlir::omp::ExitDataOp>) {
-    directiveName =
-        Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetExitData;
-    directive = llvm::omp::Directive::OMPD_target_exit_data;
-  } else if constexpr (std::is_same_v<OpTy, mlir::omp::UpdateDataOp>) {
-    directiveName =
-        Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetUpdate;
-    directive = llvm::omp::Directive::OMPD_target_update;
-  } else {
-    return nullptr;
-  }
-
-  ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(directiveName, ifClauseOperand);
-  cp.processDevice(stmtCtx, deviceOperand);
-  cp.processDepend(dependTypeOperands, dependOperands);
-  cp.processNowait(nowaitAttr);
-
-  if constexpr (std::is_same_v<OpTy, mlir::omp::UpdateDataOp>) {
-    cp.processMotionClauses<Fortran::parser::OmpClause::To>(stmtCtx,
-                                                            mapOperands);
-    cp.processMotionClauses<Fortran::parser::OmpClause::From>(stmtCtx,
-                                                              mapOperands);
-
-  } else {
-    cp.processMap(currentLocation, directive, stmtCtx, mapOperands);
-  }
-
-  return firOpBuilder.create<OpTy>(
-      currentLocation, ifClauseOperand, deviceOperand,
-      dependTypeOperands.empty()
-          ? nullptr
-          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                 dependTypeOperands),
-      dependOperands, nowaitAttr, mapOperands);
-}
-
-// This functions creates a block for the body of the targetOp's region. It adds
-// all the symbols present in mapSymbols as block arguments to this block.
-static void genBodyOfTargetOp(
-    Fortran::lower::AbstractConverter &converter,
-    Fortran::semantics::SemanticsContext &semaCtx,
-    Fortran::lower::pft::Evaluation &eval, bool genNested,
-    mlir::omp::TargetOp &targetOp,
-    const llvm::SmallVector<mlir::Type> &mapSymTypes,
-    const llvm::SmallVector<mlir::Location> &mapSymLocs,
-    const llvm::SmallVector<const Fortran::semantics::Symbol *> &mapSymbols,
-    const mlir::Location &currentLocation) {
-  assert(mapSymTypes.size() == mapSymLocs.size());
-
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  mlir::Region &region = targetOp.getRegion();
-
-  auto *regionBlock =
-      firOpBuilder.createBlock(&region, {}, mapSymTypes, mapSymLocs);
-
-  // Clones the `bounds` placing them inside the target region and returns them.
-  auto cloneBound = [&](mlir::Value bound) {
-    if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
-      mlir::Operation *clonedOp = bound.getDefiningOp()->clone();
-      regionBlock->push_back(clonedOp);
-      return clonedOp->getResult(0);
-    }
-    TODO(converter.getCurrentLocation(),
-         "target map clause operand unsupported bound type");
-  };
-
-  auto cloneBounds = [cloneBound](llvm::ArrayRef<mlir::Value> bounds) {
-    llvm::SmallVector<mlir::Value> clonedBounds;
-    for (mlir::Value bound : bounds)
-      clonedBounds.emplace_back(cloneBound(bound));
-    return clonedBounds;
-  };
+  auto cloneBounds = [cloneBound](llvm::ArrayRef<mlir::Value> bounds) {
+    llvm::SmallVector<mlir::Value> clonedBounds;
+    for (mlir::Value bound : bounds)
+      clonedBounds.emplace_back(cloneBound(bound));
+    return clonedBounds;
+  };
 
   // Bind the symbols to their corresponding block arguments.
   for (auto [argIndex, argSymbol] : llvm::enumerate(mapSymbols)) {
@@ -1187,54 +664,1053 @@ static void genBodyOfTargetOp(
             firOpBuilder.createTemporary(val.getLoc(), val.getType());
         firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);
 
-        llvm::SmallVector<mlir::Value> bounds;
-        std::stringstream name;
-        firOpBuilder.setInsertionPoint(targetOp);
-        mlir::Value mapOp = createMapInfoOp(
-            firOpBuilder, copyVal.getLoc(), copyVal, mlir::Value{}, name.str(),
-            bounds, llvm::SmallVector<mlir::Value>{},
-            static_cast<
-                std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
-                llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
-            mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType());
-        targetOp.getMapOperandsMutable().append(mapOp);
-        mlir::Value clonedValArg =
-            region.addArgument(copyVal.getType(), copyVal.getLoc());
-        firOpBuilder.setInsertionPointToStart(regionBlock);
-        auto loadOp = firOpBuilder.create<fir::LoadOp>(clonedValArg.getLoc(),
-                                                       clonedValArg);
-        val.replaceUsesWithIf(
-            loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) {
-              return use.getOwner()->getBlock() == regionBlock;
-            });
-        firOpBuilder.setInsertionPoint(regionBlock, savedIP);
-      }
+        llvm::SmallVector<mlir::Value> bounds;
+        std::stringstream name;
+        firOpBuilder.setInsertionPoint(targetOp);
+        mlir::Value mapOp = createMapInfoOp(
+            firOpBuilder, copyVal.getLoc(), copyVal, mlir::Value{}, name.str(),
+            bounds, llvm::SmallVector<mlir::Value>{},
+            static_cast<
+                std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
+                llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
+            mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType());
+        targetOp.getMapOperandsMutable().append(mapOp);
+        mlir::Value clonedValArg =
+            region.addArgument(copyVal.getType(), copyVal.getLoc());
+        firOpBuilder.setInsertionPointToStart(regionBlock);
+        auto loadOp = firOpBuilder.create<fir::LoadOp>(clonedValArg.getLoc(),
+                                                       clonedValArg);
+        val.replaceUsesWithIf(
+            loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) {
+              return use.getOwner()->getBlock() == regionBlock;
+            });
+        firOpBuilder.setInsertionPoint(regionBlock, savedIP);
+      }
+    }
+    valuesDefinedAbove.clear();
+    mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
+  }
+
+  // Insert dummy instruction to remember the insertion position. The
+  // marker will be deleted since there are not uses.
+  // In the HLFIR flow there are hlfir.declares inserted above while
+  // setting block arguments.
+  mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
+      targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType());
+
+  // Create blocks for unstructured regions. This has to be done since
+  // blocks are initially allocated with the function as the parent region.
+  if (eval.lowerAsUnstructured()) {
+    Fortran::lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp,
+                                            mlir::omp::YieldOp>(
+        firOpBuilder, eval.getNestedEvaluations());
+  }
+
+  firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);
+
+  // Create the insertion point after the marker.
+  firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
+  if (genNested)
+    genNestedEvaluations(converter, eval);
+}
+
+template <typename OpTy, typename... Args>
+static OpTy genOpWithBody(const OpWithBodyGenInfo &info, Args &&...args) {
+  auto op = info.converter.getFirOpBuilder().create<OpTy>(
+      info.loc, std::forward<Args>(args)...);
+  createBodyOfOp<OpTy>(op, info);
+  return op;
+}
+
+static mlir::Value
+calculateTripCount(Fortran::lower::AbstractConverter &converter,
+                   mlir::Location loc, llvm::ArrayRef<mlir::Value> lbs,
+                   llvm::ArrayRef<mlir::Value> ubs,
+                   llvm::ArrayRef<mlir::Value> steps) {
+  using namespace mlir::arith;
+  assert(lbs.size() == ubs.size() && lbs.size() == steps.size() &&
+         !lbs.empty() && "Invalid bounds or step");
+
+  fir::FirOpBuilder &b = converter.getFirOpBuilder();
+
+  // Get the bit width of an integer-like type.
+  auto widthOf = [](mlir::Type ty) -> unsigned {
+    if (mlir::isa<mlir::IndexType>(ty)) {
+      return mlir::IndexType::kInternalStorageBitWidth;
+    }
+    if (auto tyInt = mlir::dyn_cast<mlir::IntegerType>(ty)) {
+      return tyInt.getWidth();
+    }
+    llvm_unreachable("Unexpected type");
+  };
+
+  // For a type that is either IntegerType or IndexType, return the
+  // equivalent IntegerType. In the former case this is a no-op.
+  auto asIntTy = [&](mlir::Type ty) -> mlir::IntegerType {
+    if (ty.isIndex()) {
+      return mlir::IntegerType::get(ty.getContext(), widthOf(ty));
+    }
+    assert(ty.isIntOrIndex() && "Unexpected type");
+    return mlir::cast<mlir::IntegerType>(ty);
+  };
+
+  // For two given values, establish a common signless IntegerType
+  // that can represent any value of type of x and of type of y,
+  // and return the pair of x, y converted to the new type.
+  auto unifyToSignless =
+      [&](fir::FirOpBuilder &b, mlir::Value x,
+          mlir::Value y) -> std::pair<mlir::Value, mlir::Value> {
+    auto tyX = asIntTy(x.getType()), tyY = asIntTy(y.getType());
+    unsigned width = std::max(widthOf(tyX), widthOf(tyY));
+    auto wideTy = mlir::IntegerType::get(b.getContext(), width,
+                                         mlir::IntegerType::Signless);
+    return std::make_pair(b.createConvert(loc, wideTy, x),
+                          b.createConvert(loc, wideTy, y));
+  };
+
+  // Start with signless i32 by default.
+  auto tripCount = b.createIntegerConstant(loc, b.getI32Type(), 1);
+
+  for (auto [origLb, origUb, origStep] : llvm::zip(lbs, ubs, steps)) {
+    auto tmpS0 = b.createIntegerConstant(loc, origStep.getType(), 0);
+    auto [step, step0] = unifyToSignless(b, origStep, tmpS0);
+    auto reverseCond = b.create<CmpIOp>(loc, CmpIPredicate::slt, step, step0);
+    auto negStep = b.create<SubIOp>(loc, step0, step);
+    mlir::Value absStep = b.create<SelectOp>(loc, reverseCond, negStep, step);
+
+    auto [lb, ub] = unifyToSignless(b, origLb, origUb);
+    auto start = b.create<SelectOp>(loc, reverseCond, ub, lb);
+    auto end = b.create<SelectOp>(loc, reverseCond, lb, ub);
+
+    mlir::Value range = b.create<SubIOp>(loc, end, start);
+    auto rangeCond = b.create<CmpIOp>(loc, CmpIPredicate::slt, end, start);
+    std::tie(range, absStep) = unifyToSignless(b, range, absStep);
+    // numSteps = (range /u absStep) + 1
+    auto numSteps =
+        b.create<AddIOp>(loc, b.create<DivUIOp>(loc, range, absStep),
+                         b.createIntegerConstant(loc, range.getType(), 1));
+
+    auto trip0 = b.createIntegerConstant(loc, numSteps.getType(), 0);
+    auto loopTripCount = b.create<SelectOp>(loc, rangeCond, trip0, numSteps);
+    auto [totalTC, thisTC] = unifyToSignless(b, tripCount, loopTripCount);
+    tripCount = b.create<MulIOp>(loc, totalTC, thisTC);
+  }
+
+  return tripCount;
+}
+
+static bool evalHasSiblings(Fortran::lower::pft::Evaluation &eval) {
+  return eval.parent.visit(Fortran::common::visitors{
+      [&](const Fortran::lower::pft::Program &parent) {
+        return parent.getUnits().size() + parent.getCommonBlocks().size() > 1;
+      },
+      [&](const Fortran::lower::pft::Evaluation &parent) {
+        for (auto &sibling : *parent.evaluationList)
+          if (&sibling != &eval && !sibling.isEndStmt())
+            return true;
+
+        return false;
+      },
+      [&](const auto &parent) {
+        for (auto &sibling : parent.evaluationList)
+          if (&sibling != &eval && !sibling.isEndStmt())
+            return true;
+
+        return false;
+      }});
+}
+
+/// Extract the list of function and variable symbols affected by the given
+/// 'declare target' directive and return the intended device type for them.
+static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval,
+    const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
+    EnterLinkToClauseOps &enterLinkToClauseOps) {
+
+  // The default capture type
+  DeviceTypeClauseOps deviceTypeClauseOps = {
+      mlir::omp::DeclareTargetDeviceType::any};
+  const auto &spec = std::get<Fortran::parser::OmpDeclareTargetSpecifier>(
+      declareTargetConstruct.t);
+
+  if (const auto *objectList{
+          Fortran::parser::Unwrap<Fortran::parser::OmpObjectList>(spec.u)}) {
+    // Case: declare target(func, var1, var2)
+    gatherFuncAndVarSyms(*objectList, mlir::omp::DeclareTargetCaptureClause::to,
+                         enterLinkToClauseOps.symbolAndClause);
+  } else if (const auto *clauseList{
+                 Fortran::parser::Unwrap<Fortran::parser::OmpClauseList>(
+                     spec.u)}) {
+    if (clauseList->v.empty()) {
+      // Case: declare target, implicit capture of function
+      enterLinkToClauseOps.symbolAndClause.emplace_back(
+          mlir::omp::DeclareTargetCaptureClause::to,
+          eval.getOwningProcedure()->getSubprogramSymbol());
+    }
+
+    ClauseProcessor cp(converter, semaCtx, *clauseList);
+    cp.processDeviceType(deviceTypeClauseOps);
+    cp.processEnter(enterLinkToClauseOps);
+    cp.processLink(enterLinkToClauseOps);
+    cp.processTo(enterLinkToClauseOps);
+    cp.processTODO<Fortran::parser::OmpClause::Indirect>(
+        converter.getCurrentLocation(),
+        llvm::omp::Directive::OMPD_declare_target);
+  }
+
+  return deviceTypeClauseOps.deviceType;
+}
+
+static void collectDeferredDeclareTargets(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval,
+    const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
+    llvm::SmallVectorImpl<Fortran::lower::OMPDeferredDeclareTargetInfo>
+        &deferredDeclareTarget) {
+  EnterLinkToClauseOps clauseOps;
+  mlir::omp::DeclareTargetDeviceType devType = getDeclareTargetInfo(
+      converter, semaCtx, eval, declareTargetConstruct, clauseOps);
+  // Return the device type only if at least one of the targets for the
+  // directive is a function or subroutine
+  mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
+
+  for (const DeclareTargetCapturePair &symClause : clauseOps.symbolAndClause) {
+    mlir::Operation *op = mod.lookupSymbol(converter.mangleName(
+        std::get<const Fortran::semantics::Symbol &>(symClause)));
+
+    if (!op) {
+      deferredDeclareTarget.push_back(
+          {std::get<0>(symClause), devType, std::get<1>(symClause)});
+    }
+  }
+}
+
+static std::optional<mlir::omp::DeclareTargetDeviceType>
+getDeclareTargetFunctionDevice(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval,
+    const Fortran::parser::OpenMPDeclareTargetConstruct
+        &declareTargetConstruct) {
+  EnterLinkToClauseOps clauseOps;
+  mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo(
+      converter, semaCtx, eval, declareTargetConstruct, clauseOps);
+
+  // Return the device type only if at least one of the targets for the
+  // directive is a function or subroutine
+  mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
+  for (const DeclareTargetCapturePair &symClause : clauseOps.symbolAndClause) {
+    mlir::Operation *op = mod.lookupSymbol(converter.mangleName(
+        std::get<const Fortran::semantics::Symbol &>(symClause)));
+
+    if (mlir::isa_and_nonnull<mlir::func::FuncOp>(op))
+      return deviceType;
+  }
+
+  return std::nullopt;
+}
+
+static mlir::Operation *
+createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter,
+                              mlir::Location loc, mlir::Value indexVal,
+                              const Fortran::semantics::Symbol *sym) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
+  firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
+
+  mlir::Type tempTy = converter.genType(*sym);
+  mlir::Value temp = firOpBuilder.create<fir::AllocaOp>(
+      loc, tempTy, /*pinned=*/true, /*lengthParams=*/mlir::ValueRange{},
+      /*shapeParams*/ mlir::ValueRange{},
+      llvm::ArrayRef<mlir::NamedAttribute>{
+          fir::getAdaptToByRefAttr(firOpBuilder)});
+  converter.bindSymbol(*sym, temp);
+  firOpBuilder.restoreInsertionPoint(insPt);
+  mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal);
+  mlir::Operation *storeOp = firOpBuilder.create<fir::StoreOp>(
+      loc, cvtVal, converter.getSymbolAddress(*sym));
+  return storeOp;
+}
+
+static void
+genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter,
+            mlir::Location &loc,
+            llvm::ArrayRef<const Fortran::semantics::Symbol *> args) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  auto &region = op->getRegion(0);
+
+  std::size_t loopVarTypeSize = 0;
+  for (const Fortran::semantics::Symbol *arg : args)
+    loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
+  mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
+  llvm::SmallVector<mlir::Type> tiv(args.size(), loopVarType);
+  llvm::SmallVector<mlir::Location> locs(args.size(), loc);
+  firOpBuilder.createBlock(&region, {}, tiv, locs);
+  // The argument is not currently in memory, so make a temporary for the
+  // argument, and store it there, then bind that location to the argument.
+  mlir::Operation *storeOp = nullptr;
+  for (auto [argIndex, argSymbol] : llvm::enumerate(args)) {
+    mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex));
+    storeOp =
+        createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
+  }
+  firOpBuilder.setInsertionPointAfter(storeOp);
+}
+
+static void genReductionVars(
+    mlir::Operation *op, Fortran::lower::AbstractConverter &converter,
+    mlir::Location &loc,
+    llvm::ArrayRef<const Fortran::semantics::Symbol *> reductionArgs,
+    llvm::ArrayRef<mlir::Type> reductionTypes) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  llvm::SmallVector<mlir::Location> blockArgLocs(reductionArgs.size(), loc);
+
+  mlir::Block *entryBlock = firOpBuilder.createBlock(
+      &op->getRegion(0), {}, reductionTypes, blockArgLocs);
+
+  // Bind the reduction arguments to their block arguments
+  for (auto [arg, prv] :
+       llvm::zip_equal(reductionArgs, entryBlock->getArguments())) {
+    converter.bindSymbol(*arg, prv);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Code generation functions for clauses
+//===----------------------------------------------------------------------===//
+
+// TODO Try to compile, check privatization of simple wsloop/simdloop/distribute
+// TODO Move common args and functions into a ConstructProcessor class
+
+static void
+genCriticalDeclareClauses(Fortran::lower::AbstractConverter &converter,
+                          Fortran::semantics::SemanticsContext &semaCtx,
+                          const Fortran::parser::OmpClauseList &clauses,
+                          mlir::Location loc,
+                          CriticalDeclareOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processHint(clauseOps);
+}
+
+static void genDataClauses(Fortran::lower::AbstractConverter &converter,
+                           Fortran::semantics::SemanticsContext &semaCtx,
+                           Fortran::lower::StatementContext &stmtCtx,
+                           const Fortran::parser::OmpClauseList &clauses,
+                           mlir::Location loc, DataOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processDevice(stmtCtx, clauseOps);
+  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetData,
+               clauseOps);
+  cp.processMap(loc, stmtCtx, clauseOps);
+  cp.processUseDeviceAddr(clauseOps);
+  cp.processUseDevicePtr(clauseOps);
+}
+
+static void genDistributeClauses(Fortran::lower::AbstractConverter &converter,
+                                 Fortran::semantics::SemanticsContext &semaCtx,
+                                 const Fortran::parser::OmpClauseList &clauses,
+                                 mlir::Location loc,
+                                 DistributeOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processAllocate(clauseOps);
+  cp.processTODO<Fortran::parser::OmpClause::DistSchedule,
+                 Fortran::parser::OmpClause::Order>(
+      loc, llvm::omp::Directive::OMPD_distribute);
+}
+
+static void genEnterExitUpdateDataClauses(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::StatementContext &stmtCtx,
+    const Fortran::parser::OmpClauseList &clauses, mlir::Location loc,
+    Fortran::parser::OmpIfClause::DirectiveNameModifier directive,
+    EnterExitUpdateDataOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processDepend(clauseOps);
+  cp.processDevice(stmtCtx, clauseOps);
+  cp.processIf(directive, clauseOps);
+  cp.processNowait(clauseOps);
+
+  if (directive ==
+      Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetUpdate) {
+    cp.processMotionClauses<Fortran::parser::OmpClause::To>(stmtCtx, clauseOps);
+    cp.processMotionClauses<Fortran::parser::OmpClause::From>(stmtCtx,
+                                                              clauseOps);
+  } else {
+    cp.processMap(loc, stmtCtx, clauseOps);
+  }
+}
+
+static void genFlushClauses(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    const std::optional<Fortran::parser::OmpObjectList> &objects,
+    const std::optional<std::list<Fortran::parser::OmpMemoryOrderClause>>
+        &clauses,
+    mlir::Location loc, llvm::SmallVectorImpl<mlir::Value> &operandRange) {
+  if (objects)
+    genObjectList(*objects, converter, operandRange);
+
+  if (clauses && clauses->size() > 0)
+    TODO(converter.getCurrentLocation(), "Handle OmpMemoryOrderClause");
+}
+
+static void genLoopNestClauses(Fortran::lower::AbstractConverter &converter,
+                               Fortran::semantics::SemanticsContext &semaCtx,
+                               Fortran::lower::pft::Evaluation &eval,
+                               const Fortran::parser::OmpClauseList &clauses,
+                               mlir::Location loc,
+                               LoopNestOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processCollapse(loc, eval, clauseOps);
+}
+
+static void
+genOrderedRegionClauses(Fortran::lower::AbstractConverter &converter,
+                        Fortran::semantics::SemanticsContext &semaCtx,
+                        const Fortran::parser::OmpClauseList &clauses,
+                        mlir::Location loc,
+                        OrderedRegionOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processTODO<Fortran::parser::OmpClause::Simd,
+                 Fortran::parser::OmpClause::Threads>(
+      loc, llvm::omp::Directive::OMPD_ordered);
+}
+
+static void genParallelClauses(Fortran::lower::AbstractConverter &converter,
+                               Fortran::semantics::SemanticsContext &semaCtx,
+                               Fortran::lower::StatementContext &stmtCtx,
+                               const Fortran::parser::OmpClauseList &clauses,
+                               mlir::Location loc, bool processReduction,
+                               bool evalNumThreadsOutsideTarget,
+                               ParallelOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processAllocate(clauseOps);
+  cp.processDefault();
+  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Parallel,
+               clauseOps);
+  cp.processProcBind(clauseOps);
+
+  if (processReduction)
+    cp.processReduction(loc, clauseOps);
+
+  if (evalNumThreadsOutsideTarget) {
+    HostClausesInsertionGuard guard(converter.getFirOpBuilder());
+    cp.processNumThreads(stmtCtx, clauseOps);
+  } else {
+    cp.processNumThreads(stmtCtx, clauseOps);
+  }
+}
+
+static void genSectionsClauses(Fortran::lower::AbstractConverter &converter,
+                               Fortran::semantics::SemanticsContext &semaCtx,
+                               const Fortran::parser::OmpClauseList &clauses,
+                               mlir::Location loc,
+                               bool clausesFromBeginSections,
+                               SectionsOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  if (clausesFromBeginSections) {
+    cp.processAllocate(clauseOps);
+    cp.processSectionsReduction(loc, clauseOps);
+  } else {
+    cp.processNowait(clauseOps);
+  }
+}
+
+static void genSimdLoopClauses(Fortran::lower::AbstractConverter &converter,
+                               Fortran::semantics::SemanticsContext &semaCtx,
+                               Fortran::lower::StatementContext &stmtCtx,
+                               const Fortran::parser::OmpClauseList &clauses,
+                               mlir::Location loc,
+                               SimdLoopOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Simd,
+               clauseOps);
+  cp.processReduction(loc, clauseOps);
+  cp.processSafelen(clauseOps);
+  cp.processSimdlen(clauseOps);
+  cp.processTODO<Fortran::parser::OmpClause::Aligned,
+                 Fortran::parser::OmpClause::Linear,
+                 Fortran::parser::OmpClause::Nontemporal,
+                 Fortran::parser::OmpClause::Order>(
+      loc, llvm::omp::Directive::OMPD_simd);
+}
+
+static void genSingleClauses(Fortran::lower::AbstractConverter &converter,
+                             Fortran::semantics::SemanticsContext &semaCtx,
+                             const Fortran::parser::OmpClauseList &beginClauses,
+                             const Fortran::parser::OmpClauseList &endClauses,
+                             mlir::Location loc, SingleOpClauseOps &clauseOps) {
+  ClauseProcessor bcp(converter, semaCtx, beginClauses);
+  bcp.processAllocate(clauseOps);
+
+  ClauseProcessor ecp(converter, semaCtx, endClauses);
+  ecp.processCopyprivate(loc, clauseOps);
+  ecp.processNowait(clauseOps);
+}
+
+static void genTargetClauses(Fortran::lower::AbstractConverter &converter,
+                             Fortran::semantics::SemanticsContext &semaCtx,
+                             Fortran::lower::StatementContext &stmtCtx,
+                             const Fortran::parser::OmpClauseList &clauses,
+                             mlir::Location loc, bool processHostOnlyClauses,
+                             bool processReduction,
+                             TargetOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processDepend(clauseOps);
+  cp.processDevice(stmtCtx, clauseOps);
+  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Target,
+               clauseOps);
+  cp.processMap(loc, stmtCtx, clauseOps);
+  cp.processThreadLimit(stmtCtx, clauseOps);
+
+  if (processHostOnlyClauses)
+    cp.processNowait(clauseOps);
+
+  if (processReduction)
+    cp.processTargetReduction(clauseOps);
+
+  cp.processTODO<Fortran::parser::OmpClause::Allocate,
+                 Fortran::parser::OmpClause::Defaultmap,
+                 Fortran::parser::OmpClause::Firstprivate,
+                 Fortran::parser::OmpClause::HasDeviceAddr,
+                 Fortran::parser::OmpClause::InReduction,
+                 Fortran::parser::OmpClause::IsDevicePtr,
+                 Fortran::parser::OmpClause::Private,
+                 Fortran::parser::OmpClause::UsesAllocators>(
+      loc, llvm::omp::Directive::OMPD_target);
+}
+
+static void genTaskClauses(Fortran::lower::AbstractConverter &converter,
+                           Fortran::semantics::SemanticsContext &semaCtx,
+                           Fortran::lower::StatementContext &stmtCtx,
+                           const Fortran::parser::OmpClauseList &clauses,
+                           mlir::Location loc, TaskOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processAllocate(clauseOps);
+  cp.processDefault();
+  cp.processDepend(clauseOps);
+  cp.processFinal(stmtCtx, clauseOps);
+  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Task,
+               clauseOps);
+  cp.processMergeable(clauseOps);
+  cp.processPriority(stmtCtx, clauseOps);
+  cp.processUntied(clauseOps);
+  cp.processTODO<Fortran::parser::OmpClause::InReduction,
+                 Fortran::parser::OmpClause::Detach,
+                 Fortran::parser::OmpClause::Affinity>(
+      loc, llvm::omp::Directive::OMPD_task);
+}
+
+static void genTaskGroupClauses(Fortran::lower::AbstractConverter &converter,
+                                Fortran::semantics::SemanticsContext &semaCtx,
+                                const Fortran::parser::OmpClauseList &clauses,
+                                mlir::Location loc,
+                                TaskGroupOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processAllocate(clauseOps);
+  cp.processTODO<Fortran::parser::OmpClause::TaskReduction>(
+      loc, llvm::omp::Directive::OMPD_taskgroup);
+}
+
+static void genTaskLoopClauses(Fortran::lower::AbstractConverter &converter,
+                               Fortran::semantics::SemanticsContext &semaCtx,
+                               Fortran::lower::StatementContext &stmtCtx,
+                               const Fortran::parser::OmpClauseList &clauses,
+                               mlir::Location loc,
+                               TaskLoopOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processAllocate(clauseOps);
+  cp.processDefault();
+  cp.processFinal(stmtCtx, clauseOps);
+  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Taskloop,
+               clauseOps);
+  cp.processMergeable(clauseOps);
+  cp.processPriority(stmtCtx, clauseOps);
+  cp.processReduction(loc, clauseOps);
+  cp.processUntied(clauseOps);
+  cp.processTODO<Fortran::parser::OmpClause::Grainsize,
+                 Fortran::parser::OmpClause::InReduction,
+                 Fortran::parser::OmpClause::Nogroup,
+                 Fortran::parser::OmpClause::NumTasks>(
+      loc, llvm::omp::Directive::OMPD_taskloop);
+}
+
+static void genTaskWaitClauses(Fortran::lower::AbstractConverter &converter,
+                               Fortran::semantics::SemanticsContext &semaCtx,
+                               const Fortran::parser::OmpClauseList &clauses,
+                               mlir::Location loc,
+                               TaskWaitOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processTODO<Fortran::parser::OmpClause::Depend,
+                 Fortran::parser::OmpClause::Nowait>(
+      loc, llvm::omp::Directive::OMPD_taskwait);
+}
+
+static void genTeamsClauses(Fortran::lower::AbstractConverter &converter,
+                            Fortran::semantics::SemanticsContext &semaCtx,
+                            Fortran::lower::StatementContext &stmtCtx,
+                            const Fortran::parser::OmpClauseList &clauses,
+                            mlir::Location loc, bool evalNumTeamsOutsideTarget,
+                            TeamsOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processAllocate(clauseOps);
+  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Teams,
+               clauseOps);
+  cp.processDefault();
+
+  if (evalNumTeamsOutsideTarget) {
+    HostClausesInsertionGuard guard(converter.getFirOpBuilder());
+    cp.processNumTeams(stmtCtx, clauseOps);
+    cp.processThreadLimit(stmtCtx, clauseOps);
+  } else {
+    cp.processNumTeams(stmtCtx, clauseOps);
+    cp.processThreadLimit(stmtCtx, clauseOps);
+  }
+}
+
+static void genWsLoopClauses(Fortran::lower::AbstractConverter &converter,
+                             Fortran::semantics::SemanticsContext &semaCtx,
+                             Fortran::lower::StatementContext &stmtCtx,
+                             const Fortran::parser::OmpClauseList &beginClauses,
+                             const Fortran::parser::OmpClauseList *endClauses,
+                             mlir::Location loc, WsloopOpClauseOps &clauseOps) {
+  ClauseProcessor bcp(converter, semaCtx, beginClauses);
+  bcp.processOrdered(clauseOps);
+  bcp.processReduction(loc, clauseOps);
+  bcp.processSchedule(stmtCtx, clauseOps);
+
+  if (endClauses) {
+    ClauseProcessor ecp(converter, semaCtx, *endClauses);
+    ecp.processNowait(clauseOps);
+  }
+
+  bcp.processTODO<Fortran::parser::OmpClause::Linear,
+                  Fortran::parser::OmpClause::Order>(
+      loc, llvm::omp::Directive::OMPD_do);
+}
+
+//===----------------------------------------------------------------------===//
+// Code generation functions for leaf constructs
+//===----------------------------------------------------------------------===//
+
+// TODO Pass <X>OpClauseOps as arg to all gen<X>Op
+
+static mlir::omp::BarrierOp
+genBarrierOp(Fortran::lower::AbstractConverter &converter,
+             Fortran::semantics::SemanticsContext &semaCtx,
+             Fortran::lower::pft::Evaluation &eval,
+             mlir::Location currentLocation) {
+  return converter.getFirOpBuilder().create<mlir::omp::BarrierOp>(
+      currentLocation);
+}
+
+static mlir::omp::CriticalOp
+genCriticalOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::semantics::SemanticsContext &semaCtx,
+              Fortran::lower::pft::Evaluation &eval, bool genNested,
+              mlir::Location currentLocation,
+              const Fortran::parser::OmpClauseList &clauseList,
+              const std::optional<Fortran::parser::Name> &name) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  mlir::FlatSymbolRefAttr nameAttr;
+
+  if (name.has_value()) {
+    CriticalDeclareOpClauseOps clauseOps;
+    genCriticalDeclareClauses(converter, semaCtx, clauseList, currentLocation,
+                              clauseOps);
+
+    std::string nameStr = name.value().ToString();
+    mlir::ModuleOp module = firOpBuilder.getModule();
+    auto global = module.lookupSymbol<mlir::omp::CriticalDeclareOp>(nameStr);
+    if (!global) {
+      mlir::OpBuilder modBuilder(module.getBodyRegion());
+      global = modBuilder.create<mlir::omp::CriticalDeclareOp>(
+          currentLocation, firOpBuilder.getStringAttr(nameStr),
+          clauseOps.hintAttr);
+    }
+    nameAttr = mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(),
+                                            global.getSymName());
+  }
+
+  return genOpWithBody<mlir::omp::CriticalOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested),
+      nameAttr);
+}
+
+static mlir::omp::DataOp
+genDataOp(Fortran::lower::AbstractConverter &converter,
+          Fortran::semantics::SemanticsContext &semaCtx,
+          Fortran::lower::pft::Evaluation &eval, bool genNested,
+          mlir::Location currentLocation,
+          const Fortran::parser::OmpClauseList &clauseList) {
+  Fortran::lower::StatementContext stmtCtx;
+  DataOpClauseOps clauseOps;
+  genDataClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation,
+                 clauseOps);
+
+  auto dataOp = converter.getFirOpBuilder().create<mlir::omp::DataOp>(
+      currentLocation, clauseOps.ifVar, clauseOps.deviceVar,
+      clauseOps.useDevicePtrVars, clauseOps.useDeviceAddrVars,
+      clauseOps.mapVars);
+
+  genBodyOfTargetDataOp(converter, semaCtx, eval, genNested, dataOp,
+                        clauseOps.useDeviceTypes, clauseOps.useDeviceLocs,
+                        clauseOps.useDeviceSymbols, currentLocation);
+  return dataOp;
+}
+
+static mlir::omp::DistributeOp
+genDistributeOp(Fortran::lower::AbstractConverter &converter,
+                Fortran::semantics::SemanticsContext &semaCtx,
+                Fortran::lower::pft::Evaluation &eval, bool isComposite,
+                mlir::Location currentLocation,
+                const Fortran::parser::OmpClauseList &clauseList,
+                bool outerCombined = false) {
+  DistributeOpClauseOps clauseOps;
+  genDistributeClauses(converter, semaCtx, clauseList, currentLocation,
+                       clauseOps);
+
+  return genOpWithBody<mlir::omp::DistributeOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(false)
+          .setOuterCombined(outerCombined)
+          .setClauses(&clauseList),
+      clauseOps.distScheduleStaticAttr, clauseOps.distScheduleChunkSizeVar,
+      clauseOps.allocateVars, clauseOps.allocatorVars, clauseOps.orderAttr,
+      isComposite ? converter.getFirOpBuilder().getUnitAttr() : nullptr);
+}
+
+template <typename OpTy>
+static OpTy
+genEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter,
+                         Fortran::semantics::SemanticsContext &semaCtx,
+                         mlir::Location currentLocation,
+                         const Fortran::parser::OmpClauseList &clauseList) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  Fortran::lower::StatementContext stmtCtx;
+
+  Fortran::parser::OmpIfClause::DirectiveNameModifier directive;
+  if constexpr (std::is_same_v<OpTy, mlir::omp::EnterDataOp>) {
+    directive =
+        Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetEnterData;
+  } else if constexpr (std::is_same_v<OpTy, mlir::omp::ExitDataOp>) {
+    directive =
+        Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetExitData;
+  } else if constexpr (std::is_same_v<OpTy, mlir::omp::UpdateDataOp>) {
+    directive =
+        Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetUpdate;
+  } else {
+    llvm_unreachable("Unexpected TARGET data construct");
+  }
+
+  EnterExitUpdateDataOpClauseOps clauseOps;
+  genEnterExitUpdateDataClauses(converter, semaCtx, stmtCtx, clauseList,
+                                currentLocation, directive, clauseOps);
+
+  return firOpBuilder.create<OpTy>(
+      currentLocation, clauseOps.ifVar, clauseOps.deviceVar,
+      clauseOps.dependTypeAttrs.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.dependTypeAttrs),
+      clauseOps.dependVars, clauseOps.nowaitAttr, clauseOps.mapVars);
+}
+
+static mlir::omp::FlushOp
+genFlushOp(Fortran::lower::AbstractConverter &converter,
+           Fortran::semantics::SemanticsContext &semaCtx,
+           Fortran::lower::pft::Evaluation &eval,
+           mlir::Location currentLocation,
+           const std::optional<Fortran::parser::OmpObjectList> &objectList,
+           const std::optional<std::list<Fortran::parser::OmpMemoryOrderClause>>
+               &clauseList) {
+  llvm::SmallVector<mlir::Value, 4> operandRange;
+  genFlushClauses(converter, semaCtx, objectList, clauseList, currentLocation,
+                  operandRange);
+
+  return converter.getFirOpBuilder().create<mlir::omp::FlushOp>(
+      converter.getCurrentLocation(), operandRange);
+}
+
+static mlir::omp::LoopNestOp
+genLoopNestOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::semantics::SemanticsContext &semaCtx,
+              Fortran::lower::pft::Evaluation &eval,
+              mlir::Location currentLocation,
+              const Fortran::parser::OmpClauseList &clauseList,
+              const LoopNestOpClauseOps &clauseOps, DataSharingProcessor &dsp) {
+  auto *nestedEval =
+      getCollapsedLoopEval(eval, Fortran::lower::getCollapseValue(clauseList));
+
+  auto ivCallback = [&](mlir::Operation *op) {
+    genLoopVars(op, converter, currentLocation, clauseOps.loopIV);
+    return clauseOps.loopIV;
+  };
+
+  return genOpWithBody<mlir::omp::LoopNestOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval)
+          .setClauses(&clauseList)
+          .setDataSharingProcessor(&dsp)
+          .setGenRegionEntryCb(ivCallback)
+          .setGenNested(true),
+      clauseOps.loopLBVar, clauseOps.loopUBVar, clauseOps.loopStepVar,
+      /*inclusive=*/converter.getFirOpBuilder().getUnitAttr());
+}
+
+static mlir::omp::MasterOp
+genMasterOp(Fortran::lower::AbstractConverter &converter,
+            Fortran::semantics::SemanticsContext &semaCtx,
+            Fortran::lower::pft::Evaluation &eval, bool genNested,
+            mlir::Location currentLocation) {
+  return genOpWithBody<mlir::omp::MasterOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested),
+      /*resultTypes=*/mlir::TypeRange());
+}
+
+static mlir::omp::OrderedRegionOp
+genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
+                   Fortran::semantics::SemanticsContext &semaCtx,
+                   Fortran::lower::pft::Evaluation &eval, bool genNested,
+                   mlir::Location currentLocation,
+                   const Fortran::parser::OmpClauseList &clauseList) {
+  OrderedRegionOpClauseOps clauseOps;
+  genOrderedRegionClauses(converter, semaCtx, clauseList, currentLocation,
+                          clauseOps);
+
+  // TODO Store clauseOps.parLevelThreadsAttr in op.
+  return genOpWithBody<mlir::omp::OrderedRegionOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested),
+      clauseOps.parLevelSimdAttr);
+}
+
+static mlir::omp::ParallelOp
+genParallelOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::lower::SymMap &symTable,
+              Fortran::semantics::SemanticsContext &semaCtx,
+              Fortran::lower::pft::Evaluation &eval, bool genNested,
+              bool isComposite, mlir::Location currentLocation,
+              const Fortran::parser::OmpClauseList &clauseList,
+              bool outerCombined = false) {
+  // TODO Distinguish between genParallelOp as block vs wrapper
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  Fortran::lower::StatementContext stmtCtx;
+  ParallelOpClauseOps clauseOps;
+  clauseOps.reductionSymbols.emplace();
+
+  auto offloadModOp =
+      llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp());
+  mlir::omp::TargetOp targetOp = findParentTargetOp(firOpBuilder);
+
+  bool evalNumThreadsOutsideTarget =
+      targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval);
+
+  genParallelClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation,
+                     /*processReduction=*/!outerCombined,
+                     evalNumThreadsOutsideTarget, clauseOps);
+
+  auto reductionCallback = [&](mlir::Operation *op) {
+    genReductionVars(op, converter, currentLocation,
+                     *clauseOps.reductionSymbols, clauseOps.reductionTypes);
+    return *clauseOps.reductionSymbols;
+  };
+
+  OpWithBodyGenInfo genInfo =
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested)
+          .setOuterCombined(outerCombined)
+          .setClauses(&clauseList)
+          .setReductions(*clauseOps.reductionSymbols, clauseOps.reductionTypes)
+          .setGenRegionEntryCb(reductionCallback);
+
+  if (!enableDelayedPrivatization) {
+    auto parallelOp = genOpWithBody<mlir::omp::ParallelOp>(
+        genInfo, clauseOps.ifVar, /*num_threads_var=*/nullptr,
+        clauseOps.allocateVars, clauseOps.allocatorVars,
+        clauseOps.reductionVars,
+        clauseOps.reductionDeclSymbols.empty()
+            ? nullptr
+            : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols),
+        clauseOps.procBindKindAttr, clauseOps.privateVars,
+        clauseOps.privatizers.empty()
+            ? nullptr
+            : firOpBuilder.getArrayAttr(clauseOps.privatizers),
+        isComposite ? firOpBuilder.getUnitAttr() : nullptr);
+
+    if (clauseOps.numThreadsVar) {
+      if (evalNumThreadsOutsideTarget)
+        targetOp.getNumThreadsMutable().assign(clauseOps.numThreadsVar);
+      else
+        parallelOp.getNumThreadsVarMutable().assign(clauseOps.numThreadsVar);
     }
-    valuesDefinedAbove.clear();
-    mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
+
+    return parallelOp;
   }
 
-  // Insert dummy instruction to remember the insertion position. The
-  // marker will be deleted since there are not uses.
-  // In the HLFIR flow there are hlfir.declares inserted above while
-  // setting block arguments.
-  mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
-      targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType());
+  // TODO Integrate delayed privatization better with the new approach.
+  //   - Store delayedPrivatizationInfo.{originalAddresses,privatizers} in
+  // clauseOps.{privateVars,privatizers}.
+  //   - Outline genRegionEntryCB into composable genPrivatizedVars.
+  //   - Refactor to create the omp.parallel op in a single place and possibly
+  // only use a single callback.
+  //   - Check whether the external DataSharingProcessor could be used, and skip
+  // the call to processStep1() here. Perhaps also skip setting it in the
+  // OpWithBodyGenInfo structure.
 
-  // Create blocks for unstructured regions. This has to be done since
-  // blocks are initially allocated with the function as the parent region.
-  if (eval.lowerAsUnstructured()) {
-    Fortran::lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp,
-                                            mlir::omp::YieldOp>(
-        firOpBuilder, eval.getNestedEvaluations());
+  bool privatize = !outerCombined;
+  DataSharingProcessor dsp(converter, clauseList, eval,
+                           /*useDelayedPrivatization=*/true, &symTable);
+
+  if (privatize)
+    dsp.processStep1();
+
+  const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo();
+
+  auto genRegionEntryCB = [&](mlir::Operation *op) {
+    auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);
+
+    llvm::SmallVector<mlir::Location> reductionLocs(
+        clauseOps.reductionVars.size(), currentLocation);
+
+    mlir::OperandRange privateVars = parallelOp.getPrivateVars();
+    mlir::Region &region = parallelOp.getRegion();
+
+    llvm::SmallVector<mlir::Type> privateVarTypes = clauseOps.reductionTypes;
+    privateVarTypes.reserve(privateVarTypes.size() + privateVars.size());
+    llvm::transform(privateVars, std::back_inserter(privateVarTypes),
+                    [](mlir::Value v) { return v.getType(); });
+
+    llvm::SmallVector<mlir::Location> privateVarLocs = reductionLocs;
+    privateVarLocs.reserve(privateVarLocs.size() + privateVars.size());
+    llvm::transform(privateVars, std::back_inserter(privateVarLocs),
+                    [](mlir::Value v) { return v.getLoc(); });
+
+    converter.getFirOpBuilder().createBlock(&region, /*insertPt=*/{},
+                                            privateVarTypes, privateVarLocs);
+
+    llvm::SmallVector<const Fortran::semantics::Symbol *> allSymbols =
+        *clauseOps.reductionSymbols;
+    allSymbols.append(delayedPrivatizationInfo.symbols);
+    for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
+      converter.bindSymbol(*arg, prv);
+    }
+
+    return allSymbols;
+  };
+
+  // TODO Merge with the reduction CB.
+  genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp);
+
+  llvm::SmallVector<mlir::Attribute> privatizers(
+      delayedPrivatizationInfo.privatizers.begin(),
+      delayedPrivatizationInfo.privatizers.end());
+
+  auto parallelOp = genOpWithBody<mlir::omp::ParallelOp>(
+      genInfo, clauseOps.ifVar, /*num_threads_var=*/nullptr,
+      clauseOps.allocateVars, clauseOps.allocatorVars, clauseOps.reductionVars,
+      clauseOps.reductionDeclSymbols.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols),
+      clauseOps.procBindKindAttr, delayedPrivatizationInfo.originalAddresses,
+      delayedPrivatizationInfo.privatizers.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(privatizers),
+      isComposite ? firOpBuilder.getUnitAttr() : nullptr);
+
+  if (clauseOps.numThreadsVar) {
+    if (evalNumThreadsOutsideTarget)
+      targetOp.getNumThreadsMutable().assign(clauseOps.numThreadsVar);
+    else
+      parallelOp.getNumThreadsVarMutable().assign(clauseOps.numThreadsVar);
   }
 
-  firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);
+  return parallelOp;
+}
 
-  // Create the insertion point after the marker.
-  firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
-  if (genNested)
-    genNestedEvaluations(converter, eval);
+static mlir::omp::SectionOp
+genSectionOp(Fortran::lower::AbstractConverter &converter,
+             Fortran::semantics::SemanticsContext &semaCtx,
+             Fortran::lower::pft::Evaluation &eval, bool genNested,
+             mlir::Location currentLocation,
+             const Fortran::parser::OmpClauseList &clauseList) {
+  // Currently only private/firstprivate clause is handled, and
+  // all privatization is done within `omp.section` operations.
+  return genOpWithBody<mlir::omp::SectionOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested)
+          .setClauses(&clauseList));
+}
+
+static mlir::omp::SectionsOp
+genSectionsOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::semantics::SemanticsContext &semaCtx,
+              Fortran::lower::pft::Evaluation &eval,
+              mlir::Location currentLocation,
+              const SectionsOpClauseOps &clauseOps) {
+  return genOpWithBody<mlir::omp::SectionsOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(false),
+      /*reduction_vars=*/mlir::ValueRange(), /*reductions=*/nullptr,
+      clauseOps.allocateVars, clauseOps.allocatorVars, clauseOps.nowaitAttr);
+}
+
+static mlir::omp::SimdLoopOp
+genSimdLoopOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::semantics::SemanticsContext &semaCtx,
+              Fortran::lower::pft::Evaluation &eval, bool isComposite,
+              mlir::Location currentLocation,
+              const Fortran::parser::OmpClauseList &clauseList) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  Fortran::lower::StatementContext stmtCtx;
+  SimdLoopOpClauseOps clauseOps;
+  genSimdLoopClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation,
+                     clauseOps);
+
+  auto *nestedEval =
+      getCollapsedLoopEval(eval, Fortran::lower::getCollapseValue(clauseList));
+
+  // TODO Create callback to add reduction vars as entry block arguments.
+
+  // TODO Store clauseOps.reductionVars, clauseOps.reductionDeclSymbols in op.
+  return genOpWithBody<mlir::omp::SimdLoopOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval)
+          .setGenNested(false),
+      /*result_types=*/mlir::TypeRange(), clauseOps.alignedVars,
+      clauseOps.alignmentAttrs.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.alignmentAttrs),
+      clauseOps.ifVar, clauseOps.nontemporalVars, clauseOps.orderAttr,
+      clauseOps.simdlenAttr, clauseOps.safelenAttr,
+      isComposite ? firOpBuilder.getUnitAttr() : nullptr);
+}
+
+static mlir::omp::SingleOp
+genSingleOp(Fortran::lower::AbstractConverter &converter,
+            Fortran::semantics::SemanticsContext &semaCtx,
+            Fortran::lower::pft::Evaluation &eval, bool genNested,
+            mlir::Location currentLocation,
+            const Fortran::parser::OmpClauseList &beginClauseList,
+            const Fortran::parser::OmpClauseList &endClauseList) {
+  SingleOpClauseOps clauseOps;
+  genSingleClauses(converter, semaCtx, beginClauseList, endClauseList,
+                   currentLocation, clauseOps);
+
+  return genOpWithBody<mlir::omp::SingleOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested)
+          .setClauses(&beginClauseList),
+      clauseOps.allocateVars, clauseOps.allocatorVars,
+      clauseOps.copyprivateVars,
+      clauseOps.copyprivateFuncs.empty()
+          ? nullptr
+          : converter.getFirOpBuilder().getArrayAttr(
+                clauseOps.copyprivateFuncs),
+      clauseOps.nowaitAttr);
 }
 
 static mlir::omp::TargetOp
@@ -1243,50 +1719,29 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
             Fortran::lower::pft::Evaluation &eval, bool genNested,
             mlir::Location currentLocation,
             const Fortran::parser::OmpClauseList &clauseList,
-            llvm::omp::Directive directive, bool outerCombined = false) {
+            bool outerCombined = false) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   Fortran::lower::StatementContext stmtCtx;
-  mlir::Value ifClauseOperand, deviceOperand, threadLimitOperand;
-  mlir::UnitAttr nowaitAttr;
-  llvm::SmallVector<mlir::Attribute> dependTypeOperands;
-  llvm::SmallVector<mlir::Value> mapOperands, dependOperands;
-  llvm::SmallVector<mlir::Type> mapSymTypes;
-  llvm::SmallVector<mlir::Location> mapSymLocs;
-  llvm::SmallVector<const Fortran::semantics::Symbol *> mapSymbols;
-  llvm::SmallVector<const Fortran::semantics::Symbol *> reductionSymbols;
-
-  ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Target,
-              ifClauseOperand);
-  cp.processDevice(stmtCtx, deviceOperand);
-  cp.processThreadLimit(stmtCtx, threadLimitOperand);
-  cp.processDepend(dependTypeOperands, dependOperands);
-  cp.processMap(currentLocation, directive, stmtCtx, mapOperands, &mapSymTypes,
-                &mapSymLocs, &mapSymbols);
-
-  cp.processTODO<Fortran::parser::OmpClause::Private,
-                 Fortran::parser::OmpClause::Firstprivate,
-                 Fortran::parser::OmpClause::IsDevicePtr,
-                 Fortran::parser::OmpClause::HasDeviceAddr,
-                 Fortran::parser::OmpClause::InReduction,
-                 Fortran::parser::OmpClause::Allocate,
-                 Fortran::parser::OmpClause::UsesAllocators,
-                 Fortran::parser::OmpClause::Defaultmap>(
-      currentLocation, llvm::omp::Directive::OMPD_target);
 
-  // Process host-only clauses.
-  if (!llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp())
-           .getIsTargetDevice())
-    cp.processNowait(nowaitAttr);
+  bool processHostOnlyClauses =
+      !llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp())
+           .getIsTargetDevice();
 
-  if (outerCombined)
-    cp.processTargetReduction(reductionSymbols);
+  TargetOpClauseOps clauseOps;
+  clauseOps.mapSymbols.emplace();
+  clauseOps.mapSymLocs.emplace();
+  clauseOps.mapSymTypes.emplace();
+  genTargetClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation,
+                   processHostOnlyClauses, /*processReduction=*/outerCombined,
+                   clauseOps);
 
   // 5.8.1 Implicit Data-Mapping Attribute Rules
   // The following code follows the implicit data-mapping rules to map all the
   // symbols used inside the region that have not been explicitly mapped using
   // the map clause.
   auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) {
-    if (llvm::find(mapSymbols, &sym) == mapSymbols.end()) {
+    if (llvm::find(*clauseOps.mapSymbols, &sym) ==
+        clauseOps.mapSymbols->end()) {
       mlir::Value baseOp = converter.getSymbolAddress(sym);
       if (!baseOp)
         if (const auto *details = sym.template detailsIf<
@@ -1301,22 +1756,21 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
         fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym);
         name << sym.name().ToString();
 
-        Fortran::lower::AddrAndBoundsInfo info =
-            getDataOperandBaseAddr(converter, converter.getFirOpBuilder(), sym,
-                                   converter.getCurrentLocation());
+        Fortran::lower::AddrAndBoundsInfo info = getDataOperandBaseAddr(
+            converter, firOpBuilder, sym, converter.getCurrentLocation());
         if (fir::unwrapRefType(info.addr.getType()).isa<fir::BaseBoxType>())
           bounds =
               Fortran::lower::genBoundsOpsFromBox<mlir::omp::DataBoundsOp,
                                                   mlir::omp::DataBoundsType>(
-                  converter.getFirOpBuilder(), converter.getCurrentLocation(),
-                  converter, dataExv, info);
+                  firOpBuilder, converter.getCurrentLocation(), converter,
+                  dataExv, info);
         if (fir::unwrapRefType(info.addr.getType()).isa<fir::SequenceType>()) {
           bool dataExvIsAssumedSize =
               Fortran::semantics::IsAssumedSizeArray(sym.GetUltimate());
           bounds = Fortran::lower::genBaseBoundsOps<mlir::omp::DataBoundsOp,
                                                     mlir::omp::DataBoundsType>(
-              converter.getFirOpBuilder(), converter.getCurrentLocation(),
-              converter, dataExv, dataExvIsAssumedSize);
+              firOpBuilder, converter.getCurrentLocation(), converter, dataExv,
+              dataExvIsAssumedSize);
         }
 
         llvm::omp::OpenMPOffloadMappingFlags mapFlag =
@@ -1342,8 +1796,8 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
             mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
             mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
           }
-        } else if (llvm::find(reductionSymbols, &sym) !=
-                   reductionSymbols.end()) {
+        } else if (llvm::find(clauseOps.targetReductionSymbols, &sym) !=
+                   clauseOps.targetReductionSymbols.end()) {
           // Do a tofrom map for reduction variables.
           mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
           mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
@@ -1355,39 +1809,160 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
         }
 
         mlir::Value mapOp = createMapInfoOp(
-            converter.getFirOpBuilder(), baseOp.getLoc(), baseOp, mlir::Value{},
-            name.str(), bounds, {},
+            firOpBuilder, baseOp.getLoc(), baseOp, mlir::Value{}, name.str(),
+            bounds, {},
             static_cast<
                 std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
                 mapFlag),
             captureKind, baseOp.getType());
 
-        mapOperands.push_back(mapOp);
-        mapSymTypes.push_back(baseOp.getType());
-        mapSymLocs.push_back(baseOp.getLoc());
-        mapSymbols.push_back(&sym);
+        clauseOps.mapVars.push_back(mapOp);
+        clauseOps.mapSymTypes->push_back(baseOp.getType());
+        clauseOps.mapSymLocs->push_back(baseOp.getLoc());
+        clauseOps.mapSymbols->push_back(&sym);
       }
     }
   };
   Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap);
 
-  auto targetOp = converter.getFirOpBuilder().create<mlir::omp::TargetOp>(
-      currentLocation, ifClauseOperand, deviceOperand, threadLimitOperand,
+  auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(
+      currentLocation, clauseOps.ifVar, clauseOps.deviceVar,
+      clauseOps.threadLimitVar,
       /*trip_count=*/nullptr,
-      dependTypeOperands.empty()
+      clauseOps.dependTypeAttrs.empty()
           ? nullptr
-          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                 dependTypeOperands),
-      dependOperands, nowaitAttr, mapOperands,
+          : firOpBuilder.getArrayAttr(clauseOps.dependTypeAttrs),
+      clauseOps.dependVars, clauseOps.nowaitAttr, clauseOps.mapVars,
       /*num_teams_lower=*/nullptr, /*num_teams_upper=*/nullptr,
       /*teams_thread_limit=*/nullptr, /*num_threads=*/nullptr);
 
-  genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSymTypes,
-                    mapSymLocs, mapSymbols, currentLocation);
+  genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp,
+                    *clauseOps.mapSymTypes, *clauseOps.mapSymLocs,
+                    *clauseOps.mapSymbols, currentLocation);
 
   return targetOp;
 }
 
+static mlir::omp::TaskGroupOp
+genTaskGroupOp(Fortran::lower::AbstractConverter &converter,
+               Fortran::semantics::SemanticsContext &semaCtx,
+               Fortran::lower::pft::Evaluation &eval, bool genNested,
+               mlir::Location currentLocation,
+               const Fortran::parser::OmpClauseList &clauseList) {
+  TaskGroupOpClauseOps clauseOps;
+  genTaskGroupClauses(converter, semaCtx, clauseList, currentLocation,
+                      clauseOps);
+
+  // TODO Possibly create callback to add task reduction vars as entry block
+  // arguments.
+
+  return genOpWithBody<mlir::omp::TaskGroupOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested)
+          .setClauses(&clauseList),
+      clauseOps.taskReductionVars,
+      clauseOps.taskReductionDeclSymbols.empty()
+          ? nullptr
+          : converter.getFirOpBuilder().getArrayAttr(
+                clauseOps.taskReductionDeclSymbols),
+      clauseOps.allocateVars, clauseOps.allocatorVars);
+}
+
+static mlir::omp::TaskLoopOp
+genTaskLoopOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::semantics::SemanticsContext &semaCtx,
+              Fortran::lower::pft::Evaluation &eval, bool isComposite,
+              mlir::Location currentLocation,
+              const Fortran::parser::OmpClauseList &clauseList) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  Fortran::lower::StatementContext stmtCtx;
+  TaskLoopOpClauseOps clauseOps;
+  clauseOps.reductionSymbols.emplace();
+  genTaskLoopClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation,
+                     clauseOps);
+
+  auto *nestedEval =
+      getCollapsedLoopEval(eval, Fortran::lower::getCollapseValue(clauseList));
+
+  auto reductionCallback = [&](mlir::Operation *op) {
+    // TODO Possibly add in-reductions to the entry block argument list.
+    genReductionVars(op, converter, currentLocation,
+                     *clauseOps.reductionSymbols, clauseOps.reductionTypes);
+    return *clauseOps.reductionSymbols;
+  };
+
+  return genOpWithBody<mlir::omp::TaskLoopOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval)
+          .setGenRegionEntryCb(reductionCallback)
+          .setReductions(*clauseOps.reductionSymbols, clauseOps.reductionTypes)
+          .setGenNested(false),
+      clauseOps.ifVar, clauseOps.finalVar, clauseOps.untiedAttr,
+      clauseOps.mergeableAttr, clauseOps.inReductionVars,
+      clauseOps.inReductionDeclSymbols.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.inReductionDeclSymbols),
+      clauseOps.reductionVars,
+      clauseOps.reductionDeclSymbols.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols),
+      clauseOps.priorityVar, clauseOps.allocateVars, clauseOps.allocatorVars,
+      clauseOps.grainsizeVar, clauseOps.numTasksVar, clauseOps.nogroupAttr,
+      isComposite ? firOpBuilder.getUnitAttr() : nullptr);
+}
+
+static mlir::omp::TaskOp
+genTaskOp(Fortran::lower::AbstractConverter &converter,
+          Fortran::semantics::SemanticsContext &semaCtx,
+          Fortran::lower::pft::Evaluation &eval, bool genNested,
+          mlir::Location currentLocation,
+          const Fortran::parser::OmpClauseList &clauseList) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  Fortran::lower::StatementContext stmtCtx;
+  TaskOpClauseOps clauseOps;
+  genTaskClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation,
+                 clauseOps);
+
+  // TODO Possibly create callback to add in-reductions as entry block
+  // arguments.
+
+  return genOpWithBody<mlir::omp::TaskOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested)
+          .setClauses(&clauseList),
+      clauseOps.ifVar, clauseOps.finalVar, clauseOps.untiedAttr,
+      clauseOps.mergeableAttr, clauseOps.inReductionVars,
+      clauseOps.inReductionDeclSymbols.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.inReductionDeclSymbols),
+      clauseOps.priorityVar,
+      clauseOps.dependTypeAttrs.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.dependTypeAttrs),
+      clauseOps.dependVars, clauseOps.allocateVars, clauseOps.allocatorVars);
+}
+
+static mlir::omp::TaskWaitOp
+genTaskWaitOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::semantics::SemanticsContext &semaCtx,
+              Fortran::lower::pft::Evaluation &eval,
+              mlir::Location currentLocation,
+              const Fortran::parser::OmpClauseList &clauseList) {
+  TaskWaitOpClauseOps clauseOps;
+  genTaskWaitClauses(converter, semaCtx, clauseList, currentLocation,
+                     clauseOps);
+  return converter.getFirOpBuilder().create<mlir::omp::TaskWaitOp>(
+      currentLocation);
+}
+
+static mlir::omp::TaskYieldOp
+genTaskYieldOp(Fortran::lower::AbstractConverter &converter,
+               Fortran::semantics::SemanticsContext &semaCtx,
+               Fortran::lower::pft::Evaluation &eval,
+               mlir::Location currentLocation) {
+  return converter.getFirOpBuilder().create<mlir::omp::TaskYieldOp>(
+      currentLocation);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(Fortran::lower::AbstractConverter &converter,
            Fortran::semantics::SemanticsContext &semaCtx,
@@ -1395,192 +1970,233 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter,
            mlir::Location currentLocation,
            const Fortran::parser::OmpClauseList &clauseList,
            bool outerCombined = false) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   Fortran::lower::StatementContext stmtCtx;
-  mlir::Value numTeamsClauseOperand, ifClauseOperand, threadLimitClauseOperand;
-  llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands,
-      reductionVars;
-  llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
-
-  ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Teams,
-               ifClauseOperand);
-  cp.processAllocate(allocatorOperands, allocateOperands);
-  cp.processDefault();
-  //  cp.processTODO<Fortran::parser::OmpClause::Reduction>(
-  //      currentLocation, llvm::omp::Directive::OMPD_teams);
+  TeamsOpClauseOps clauseOps;
 
-  // Evaluate NUM_TEAMS and THREAD_LIMIT on the host device, if currently inside
-  // of an omp.target operation.
   auto offloadModOp = llvm::cast<mlir::omp::OffloadModuleInterface>(
       converter.getModuleOp().getOperation());
-  mlir::omp::TargetOp targetOp =
-      findParentTargetOp(converter.getFirOpBuilder());
+  mlir::omp::TargetOp targetOp = findParentTargetOp(firOpBuilder);
 
-  bool mustEvalOutsideTarget = targetOp && !offloadModOp.getIsTargetDevice();
-  if (mustEvalOutsideTarget) {
-    HostClausesInsertionGuard guard(converter.getFirOpBuilder());
-    cp.processNumTeams(stmtCtx, numTeamsClauseOperand);
-    cp.processThreadLimit(stmtCtx, threadLimitClauseOperand);
-  } else {
-    cp.processNumTeams(stmtCtx, numTeamsClauseOperand);
-    cp.processThreadLimit(stmtCtx, threadLimitClauseOperand);
-  }
+  bool evalNumTeamsOutsideTarget =
+      targetOp && !offloadModOp.getIsTargetDevice();
+
+  genTeamsClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation,
+                  evalNumTeamsOutsideTarget, clauseOps);
+
+  // TODO Possibly create callback to add reductions as entry block arguments.
 
   auto teamsOp = genOpWithBody<mlir::omp::TeamsOp>(
       OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
           .setGenNested(genNested)
           .setOuterCombined(outerCombined)
           .setClauses(&clauseList),
-      /*num_teams_lower=*/nullptr, /*num_teams_upper=*/nullptr, ifClauseOperand,
-      /*thread_limit=*/nullptr, allocateOperands, allocatorOperands,
-      reductionVars,
-      reductionDeclSymbols.empty()
+      /*num_teams_lower=*/nullptr, /*num_teams_upper=*/nullptr, clauseOps.ifVar,
+      /*thread_limit=*/nullptr, clauseOps.allocateVars, clauseOps.allocatorVars,
+      clauseOps.reductionVars,
+      clauseOps.reductionDeclSymbols.empty()
           ? nullptr
-          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                 reductionDeclSymbols));
-  if (mustEvalOutsideTarget) {
-    if (numTeamsClauseOperand)
-      targetOp.getNumTeamsUpperMutable().assign(numTeamsClauseOperand);
-    if (threadLimitClauseOperand)
-      targetOp.getTeamsThreadLimitMutable().assign(threadLimitClauseOperand);
+          : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols));
+
+  // TODO Populate lower bound once supported by the clause processor
+  if (evalNumTeamsOutsideTarget) {
+    if (clauseOps.numTeamsUpperVar)
+      targetOp.getNumTeamsUpperMutable().assign(clauseOps.numTeamsUpperVar);
+    if (clauseOps.threadLimitVar)
+      targetOp.getTeamsThreadLimitMutable().assign(clauseOps.threadLimitVar);
   } else {
-    if (numTeamsClauseOperand)
-      teamsOp.getNumTeamsUpperMutable().assign(numTeamsClauseOperand);
-    if (threadLimitClauseOperand)
-      teamsOp.getThreadLimitMutable().assign(threadLimitClauseOperand);
+    if (clauseOps.numTeamsUpperVar)
+      teamsOp.getNumTeamsUpperMutable().assign(clauseOps.numTeamsUpperVar);
+    if (clauseOps.threadLimitVar)
+      teamsOp.getThreadLimitMutable().assign(clauseOps.threadLimitVar);
   }
 
   return teamsOp;
 }
 
-static mlir::omp::DistributeOp
-genDistributeOp(Fortran::lower::AbstractConverter &converter,
-                Fortran::semantics::SemanticsContext &semaCtx,
-                Fortran::lower::pft::Evaluation &eval, bool genNested,
-                mlir::Location currentLocation,
-                const Fortran::parser::OmpClauseList &clauseList,
-                bool outerCombined = false) {
-  // TODO Process clauses
-  // ClauseProcessor cp(converter, clauseList);
-  // cp.processAllocate(allocatorOperands, allocateOperands);
-  // ...
-
-  return genOpWithBody<mlir::omp::DistributeOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested)
-          .setOuterCombined(outerCombined)
-          .setClauses(&clauseList),
-      /*dist_schedule_static=*/nullptr,
-      /*chunk_size=*/nullptr,
-      /*allocate_vars=*/mlir::ValueRange(),
-      /*allocators_vars=*/mlir::ValueRange(),
-      /*order_val=*/nullptr);
-}
-
-/// Extract the list of function and variable symbols affected by the given
-/// 'declare target' directive and return the intended device type for them.
-static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo(
-    Fortran::lower::AbstractConverter &converter,
-    Fortran::semantics::SemanticsContext &semaCtx,
-    Fortran::lower::pft::Evaluation &eval,
-    const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
-    llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause) {
-
-  // The default capture type
-  mlir::omp::DeclareTargetDeviceType deviceType =
-      mlir::omp::DeclareTargetDeviceType::any;
-  const auto &spec = std::get<Fortran::parser::OmpDeclareTargetSpecifier>(
-      declareTargetConstruct.t);
+static mlir::omp::WsLoopOp
+genWsLoopOp(Fortran::lower::AbstractConverter &converter,
+            Fortran::semantics::SemanticsContext &semaCtx,
+            Fortran::lower::pft::Evaluation &eval, bool isComposite,
+            mlir::Location currentLocation,
+            const Fortran::parser::OmpClauseList &beginClauseList,
+            const Fortran::parser::OmpClauseList *endClauseList) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  Fortran::lower::StatementContext stmtCtx;
+  WsloopOpClauseOps clauseOps;
+  clauseOps.reductionSymbols.emplace();
+  genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList,
+                   currentLocation, clauseOps);
 
-  if (const auto *objectList{
-          Fortran::parser::Unwrap<Fortran::parser::OmpObjectList>(spec.u)}) {
-    // Case: declare target(func, var1, var2)
-    gatherFuncAndVarSyms(*objectList, mlir::omp::DeclareTargetCaptureClause::to,
-                         symbolAndClause);
-  } else if (const auto *clauseList{
-                 Fortran::parser::Unwrap<Fortran::parser::OmpClauseList>(
-                     spec.u)}) {
-    if (clauseList->v.empty()) {
-      // Case: declare target, implicit capture of function
-      symbolAndClause.emplace_back(
-          mlir::omp::DeclareTargetCaptureClause::to,
-          eval.getOwningProcedure()->getSubprogramSymbol());
-    }
+  auto *nestedEval = getCollapsedLoopEval(
+      eval, Fortran::lower::getCollapseValue(beginClauseList));
 
-    ClauseProcessor cp(converter, semaCtx, *clauseList);
-    cp.processTo(symbolAndClause);
-    cp.processEnter(symbolAndClause);
-    cp.processLink(symbolAndClause);
-    cp.processDeviceType(deviceType);
-    cp.processTODO<Fortran::parser::OmpClause::Indirect>(
-        converter.getCurrentLocation(),
-        llvm::omp::Directive::OMPD_declare_target);
-  }
+  auto reductionCallback = [&](mlir::Operation *op) {
+    genReductionVars(op, converter, currentLocation,
+                     *clauseOps.reductionSymbols, clauseOps.reductionTypes);
+    return *clauseOps.reductionSymbols;
+  };
 
-  return deviceType;
+  return genOpWithBody<mlir::omp::WsLoopOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval)
+          .setReductions(*clauseOps.reductionSymbols, clauseOps.reductionTypes)
+          .setGenRegionEntryCb(reductionCallback)
+          .setGenNested(false),
+      clauseOps.linearVars, clauseOps.linearStepVars, clauseOps.reductionVars,
+      clauseOps.reductionDeclSymbols.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols),
+      clauseOps.scheduleValAttr, clauseOps.scheduleChunkVar,
+      clauseOps.scheduleModAttr, clauseOps.scheduleSimdAttr,
+      clauseOps.nowaitAttr, clauseOps.orderedAttr, clauseOps.orderAttr,
+      isComposite ? firOpBuilder.getUnitAttr() : nullptr);
 }
 
-static void collectDeferredDeclareTargets(
+//===----------------------------------------------------------------------===//
+// Code generation functions for composite constructs
+//===----------------------------------------------------------------------===//
+
+static void genCompositeDistributeParallelDo(
     Fortran::lower::AbstractConverter &converter,
     Fortran::semantics::SemanticsContext &semaCtx,
     Fortran::lower::pft::Evaluation &eval,
-    const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
-    llvm::SmallVectorImpl<Fortran::lower::OMPDeferredDeclareTargetInfo>
-        &deferredDeclareTarget) {
-  llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
-  mlir::omp::DeclareTargetDeviceType devType = getDeclareTargetInfo(
-      converter, semaCtx, eval, declareTargetConstruct, symbolAndClause);
-  // Return the device type only if at least one of the targets for the
-  // directive is a function or subroutine
-  mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
-
-  for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
-    mlir::Operation *op = mod.lookupSymbol(converter.mangleName(
-        std::get<const Fortran::semantics::Symbol &>(symClause)));
-
-    if (!op) {
-      deferredDeclareTarget.push_back(
-          {std::get<0>(symClause), devType, std::get<1>(symClause)});
-    }
-  }
-}
-
-static std::optional<mlir::omp::DeclareTargetDeviceType>
-getDeclareTargetFunctionDevice(
+    const Fortran::parser::OmpClauseList &beginClauseList,
+    const Fortran::parser::OmpClauseList *endClauseList,
+    mlir::Location currentLocation) {
+  Fortran::lower::StatementContext stmtCtx;
+  DistributeOpClauseOps distributeClauseOps;
+  ParallelOpClauseOps parallelClauseOps;
+  WsloopOpClauseOps wsLoopClauseOps;
+
+  genDistributeClauses(converter, semaCtx, beginClauseList, currentLocation,
+                       distributeClauseOps);
+  // TODO evalNumThreadsOutsideTarget
+  genParallelClauses(converter, semaCtx, stmtCtx, beginClauseList,
+                     currentLocation, /*processReduction=*/true,
+                     /*evalNumThreadsOutsideTarget=*/true, parallelClauseOps);
+  genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList,
+                   currentLocation, wsLoopClauseOps);
+
+  // TODO Pass clauseOps structures to generate wrappers
+  // genDistributeOp();
+  // genParallelOp();
+  // genWsLoopOp();
+  TODO(currentLocation, "Composite DISTRIBUTE PARALLEL DO not implemented");
+}
+
+static void genCompositeDistributeParallelDoSimd(
     Fortran::lower::AbstractConverter &converter,
     Fortran::semantics::SemanticsContext &semaCtx,
     Fortran::lower::pft::Evaluation &eval,
-    const Fortran::parser::OpenMPDeclareTargetConstruct
-        &declareTargetConstruct) {
-  llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
-  mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo(
-      converter, semaCtx, eval, declareTargetConstruct, symbolAndClause);
+    const Fortran::parser::OmpClauseList &beginClauseList,
+    const Fortran::parser::OmpClauseList *endClauseList,
+    mlir::Location currentLocation) {
+  Fortran::lower::StatementContext stmtCtx;
+  DistributeOpClauseOps distributeClauseOps;
+  ParallelOpClauseOps parallelClauseOps;
+  WsloopOpClauseOps wsLoopClauseOps;
+  SimdLoopOpClauseOps simdClauseOps;
+
+  genDistributeClauses(converter, semaCtx, beginClauseList, currentLocation,
+                       distributeClauseOps);
+  // TODO evalNumThreadsOutsideTarget
+  genParallelClauses(converter, semaCtx, stmtCtx, beginClauseList,
+                     currentLocation, /*processReduction=*/true,
+                     /*evalNumThreadsOutsideTarget=*/true, parallelClauseOps);
+  genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList,
+                   currentLocation, wsLoopClauseOps);
+  genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList,
+                     currentLocation, simdClauseOps);
+
+  // TODO Pass clauseOps structures to generate wrappers
+  // genDistributeOp();
+  // genParallelOp();
+  // genWsloopOp();
+  // genSimdLoopOp();
+  TODO(currentLocation,
+       "Composite DISTRIBUTE PARALLEL DO SIMD not implemented");
+}
+
+static void genCompositeDistributeSimd(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval,
+    const Fortran::parser::OmpClauseList &beginClauseList,
+    const Fortran::parser::OmpClauseList *endClauseList,
+    mlir::Location currentLocation) {
+  Fortran::lower::StatementContext stmtCtx;
+  DistributeOpClauseOps distributeClauseOps;
+  SimdLoopOpClauseOps simdClauseOps;
 
-  // Return the device type only if at least one of the targets for the
-  // directive is a function or subroutine
-  mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
-  for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
-    mlir::Operation *op = mod.lookupSymbol(converter.mangleName(
-        std::get<const Fortran::semantics::Symbol &>(symClause)));
+  genDistributeClauses(converter, semaCtx, beginClauseList, currentLocation,
+                       distributeClauseOps);
+  genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList,
+                     currentLocation, simdClauseOps);
 
-    if (mlir::isa_and_nonnull<mlir::func::FuncOp>(op))
-      return deviceType;
-  }
+  // TODO Pass clauseOps structures to generate wrappers
+  // genDistributeOp();
+  // genSimdLoopOp();
+  TODO(currentLocation, "Composite DISTRIBUTE SIMD not implemented");
+}
 
-  return std::nullopt;
+static void
+genCompositeDoSimd(Fortran::lower::AbstractConverter &converter,
+                   Fortran::semantics::SemanticsContext &semaCtx,
+                   Fortran::lower::pft::Evaluation &eval,
+                   const Fortran::parser::OmpClauseList &beginClauseList,
+                   const Fortran::parser::OmpClauseList *endClauseList,
+                   mlir::Location currentLocation) {
+  Fortran::lower::StatementContext stmtCtx;
+  WsloopOpClauseOps wsLoopClauseOps;
+  SimdLoopOpClauseOps simdClauseOps;
+
+  genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList,
+                   currentLocation, wsLoopClauseOps);
+  genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList,
+                     currentLocation, simdClauseOps);
+
+  // TODO Pass clauseOps structures to generate wrappers
+  // genWsloopOp();
+  // genSimdLoopOp();
+  TODO(currentLocation, "Composite DO SIMD not implemented");
 }
 
 static void
-genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter,
-                       Fortran::semantics::SemanticsContext &semaCtx,
-                       Fortran::lower::pft::Evaluation &eval, bool genNested,
-                       const Fortran::parser::OpenMPSimpleStandaloneConstruct
-                           &simpleStandaloneConstruct) {
+genCompositeTaskLoopSimd(Fortran::lower::AbstractConverter &converter,
+                         Fortran::semantics::SemanticsContext &semaCtx,
+                         Fortran::lower::pft::Evaluation &eval,
+                         const Fortran::parser::OmpClauseList &beginClauseList,
+                         const Fortran::parser::OmpClauseList *endClauseList,
+                         mlir::Location currentLocation) {
+  Fortran::lower::StatementContext stmtCtx;
+  TaskLoopOpClauseOps taskLoopClauseOps;
+  SimdLoopOpClauseOps simdClauseOps;
+
+  genTaskLoopClauses(converter, semaCtx, stmtCtx, beginClauseList,
+                     currentLocation, taskLoopClauseOps);
+  genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList,
+                     currentLocation, simdClauseOps);
+
+  // TODO Pass clauseOps structures to generate wrappers
+  // genTaskloopOp();
+  // genSimdLoopOp();
+  TODO(currentLocation, "Composite TASKLOOP SIMD not implemented");
+}
+
+//===----------------------------------------------------------------------===//
+// genOMP() Code generation functions
+//===----------------------------------------------------------------------===//
+
+static void genOMP(Fortran::lower::AbstractConverter &converter,
+                   Fortran::lower::SymMap &symTable,
+                   Fortran::semantics::SemanticsContext &semaCtx,
+                   Fortran::lower::pft::Evaluation &eval,
+                   const Fortran::parser::OpenMPSimpleStandaloneConstruct
+                       &simpleStandaloneConstruct) {
   const auto &directive =
       std::get<Fortran::parser::OmpSimpleStandaloneDirective>(
           simpleStandaloneConstruct.t);
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  const auto &opClauseList =
+  const auto &clauseList =
       std::get<Fortran::parser::OmpClauseList>(simpleStandaloneConstruct.t);
   mlir::Location currentLocation = converter.genLocation(directive.source);
 
@@ -1588,33 +2204,29 @@ genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter,
   default:
     break;
   case llvm::omp::Directive::OMPD_barrier:
-    firOpBuilder.create<mlir::omp::BarrierOp>(currentLocation);
+    genBarrierOp(converter, semaCtx, eval, currentLocation);
     break;
   case llvm::omp::Directive::OMPD_taskwait:
-    ClauseProcessor(converter, semaCtx, opClauseList)
-        .processTODO<Fortran::parser::OmpClause::Depend,
-                     Fortran::parser::OmpClause::Nowait>(
-            currentLocation, llvm::omp::Directive::OMPD_taskwait);
-    firOpBuilder.create<mlir::omp::TaskwaitOp>(currentLocation);
+    genTaskWaitOp(converter, semaCtx, eval, currentLocation, clauseList);
     break;
   case llvm::omp::Directive::OMPD_taskyield:
-    firOpBuilder.create<mlir::omp::TaskyieldOp>(currentLocation);
+    genTaskYieldOp(converter, semaCtx, eval, currentLocation);
     break;
   case llvm::omp::Directive::OMPD_target_data:
-    genDataOp(converter, semaCtx, eval, genNested, currentLocation,
-              opClauseList);
+    genDataOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
+              clauseList);
     break;
   case llvm::omp::Directive::OMPD_target_enter_data:
     genEnterExitUpdateDataOp<mlir::omp::EnterDataOp>(
-        converter, semaCtx, currentLocation, opClauseList);
+        converter, semaCtx, currentLocation, clauseList);
     break;
   case llvm::omp::Directive::OMPD_target_exit_data:
     genEnterExitUpdateDataOp<mlir::omp::ExitDataOp>(
-        converter, semaCtx, currentLocation, opClauseList);
+        converter, semaCtx, currentLocation, clauseList);
     break;
   case llvm::omp::Directive::OMPD_target_update:
     genEnterExitUpdateDataOp<mlir::omp::UpdateDataOp>(
-        converter, semaCtx, currentLocation, opClauseList);
+        converter, semaCtx, currentLocation, clauseList);
     break;
   case llvm::omp::Directive::OMPD_ordered:
     TODO(currentLocation, "OMPD_ordered");
@@ -1622,28 +2234,21 @@ genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter,
 }
 
 static void
-genOmpFlush(Fortran::lower::AbstractConverter &converter,
-            Fortran::semantics::SemanticsContext &semaCtx,
-            Fortran::lower::pft::Evaluation &eval,
-            const Fortran::parser::OpenMPFlushConstruct &flushConstruct) {
-  llvm::SmallVector<mlir::Value, 4> operandRange;
-  if (const auto &ompObjectList =
-          std::get<std::optional<Fortran::parser::OmpObjectList>>(
-              flushConstruct.t))
-    genObjectList(*ompObjectList, converter, operandRange);
-  const auto &memOrderClause =
+genOMP(Fortran::lower::AbstractConverter &converter,
+       Fortran::lower::SymMap &symTable,
+       Fortran::semantics::SemanticsContext &semaCtx,
+       Fortran::lower::pft::Evaluation &eval,
+       const Fortran::parser::OpenMPFlushConstruct &flushConstruct) {
+  const auto &verbatim = std::get<Fortran::parser::Verbatim>(flushConstruct.t);
+  const auto &objectList =
+      std::get<std::optional<Fortran::parser::OmpObjectList>>(flushConstruct.t);
+  const auto &clauseList =
       std::get<std::optional<std::list<Fortran::parser::OmpMemoryOrderClause>>>(
           flushConstruct.t);
-  if (memOrderClause && memOrderClause->size() > 0)
-    TODO(converter.getCurrentLocation(), "Handle OmpMemoryOrderClause");
-  converter.getFirOpBuilder().create<mlir::omp::FlushOp>(
-      converter.getCurrentLocation(), operandRange);
+  mlir::Location currentLocation = converter.genLocation(verbatim.source);
+  genFlushOp(converter, semaCtx, eval, currentLocation, objectList, clauseList);
 }
 
-//===----------------------------------------------------------------------===//
-// genOMP() Code generation helper functions
-//===----------------------------------------------------------------------===//
-
 static void
 genOMP(Fortran::lower::AbstractConverter &converter,
        Fortran::lower::SymMap &symTable,
@@ -1654,12 +2259,11 @@ genOMP(Fortran::lower::AbstractConverter &converter,
       Fortran::common::visitors{
           [&](const Fortran::parser::OpenMPSimpleStandaloneConstruct
                   &simpleStandaloneConstruct) {
-            genOmpSimpleStandalone(converter, semaCtx, eval,
-                                   /*genNested=*/true,
-                                   simpleStandaloneConstruct);
+            genOMP(converter, symTable, semaCtx, eval,
+                   simpleStandaloneConstruct);
           },
           [&](const Fortran::parser::OpenMPFlushConstruct &flushConstruct) {
-            genOmpFlush(converter, semaCtx, eval, flushConstruct);
+            genOMP(converter, symTable, semaCtx, eval, flushConstruct);
           },
           [&](const Fortran::parser::OpenMPCancelConstruct &cancelConstruct) {
             TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct");
@@ -1672,300 +2276,6 @@ genOMP(Fortran::lower::AbstractConverter &converter,
       standaloneConstruct.u);
 }
 
-static void convertLoopBounds(Fortran::lower::AbstractConverter &converter,
-                              mlir::Location loc,
-                              llvm::SmallVectorImpl<mlir::Value> &lowerBound,
-                              llvm::SmallVectorImpl<mlir::Value> &upperBound,
-                              llvm::SmallVectorImpl<mlir::Value> &step,
-                              std::size_t loopVarTypeSize) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  // The types of lower bound, upper bound, and step are converted into the
-  // type of the loop variable if necessary.
-  mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
-  for (unsigned it = 0; it < (unsigned)lowerBound.size(); it++) {
-    lowerBound[it] =
-        firOpBuilder.createConvert(loc, loopVarType, lowerBound[it]);
-    upperBound[it] =
-        firOpBuilder.createConvert(loc, loopVarType, upperBound[it]);
-    step[it] = firOpBuilder.createConvert(loc, loopVarType, step[it]);
-  }
-}
-
-static llvm::SmallVector<const Fortran::semantics::Symbol *>
-genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter,
-            mlir::Location &loc,
-            const llvm::SmallVector<const Fortran::semantics::Symbol *> &args) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  auto &region = op->getRegion(0);
-
-  std::size_t loopVarTypeSize = 0;
-  for (const Fortran::semantics::Symbol *arg : args)
-    loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
-  mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
-  llvm::SmallVector<mlir::Type> tiv(args.size(), loopVarType);
-  llvm::SmallVector<mlir::Location> locs(args.size(), loc);
-  firOpBuilder.createBlock(&region, {}, tiv, locs);
-  // The argument is not currently in memory, so make a temporary for the
-  // argument, and store it there, then bind that location to the argument.
-  mlir::Operation *storeOp = nullptr;
-  for (auto [argIndex, argSymbol] : llvm::enumerate(args)) {
-    mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex));
-    storeOp =
-        createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
-  }
-  firOpBuilder.setInsertionPointAfter(storeOp);
-
-  return args;
-}
-
-static llvm::SmallVector<const Fortran::semantics::Symbol *>
-genLoopAndReductionVars(
-    mlir::Operation *op, Fortran::lower::AbstractConverter &converter,
-    mlir::Location &loc,
-    const llvm::SmallVector<const Fortran::semantics::Symbol *> &loopArgs,
-    const llvm::SmallVector<const Fortran::semantics::Symbol *> &reductionArgs,
-    llvm::SmallVector<mlir::Type> &reductionTypes) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-
-  llvm::SmallVector<mlir::Type> blockArgTypes;
-  llvm::SmallVector<mlir::Location> blockArgLocs;
-  blockArgTypes.reserve(loopArgs.size() + reductionArgs.size());
-  blockArgLocs.reserve(blockArgTypes.size());
-  mlir::Block *entryBlock;
-
-  if (loopArgs.size()) {
-    std::size_t loopVarTypeSize = 0;
-    for (const Fortran::semantics::Symbol *arg : loopArgs)
-      loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
-    mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
-    std::fill_n(std::back_inserter(blockArgTypes), loopArgs.size(),
-                loopVarType);
-    std::fill_n(std::back_inserter(blockArgLocs), loopArgs.size(), loc);
-  }
-  if (reductionArgs.size()) {
-    llvm::copy(reductionTypes, std::back_inserter(blockArgTypes));
-    std::fill_n(std::back_inserter(blockArgLocs), reductionArgs.size(), loc);
-  }
-  entryBlock = firOpBuilder.createBlock(&op->getRegion(0), {}, blockArgTypes,
-                                        blockArgLocs);
-  // The argument is not currently in memory, so make a temporary for the
-  // argument, and store it there, then bind that location to the argument.
-  if (loopArgs.size()) {
-    mlir::Operation *storeOp = nullptr;
-    for (auto [argIndex, argSymbol] : llvm::enumerate(loopArgs)) {
-      mlir::Value indexVal =
-          fir::getBase(op->getRegion(0).front().getArgument(argIndex));
-      storeOp =
-          createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
-    }
-    firOpBuilder.setInsertionPointAfter(storeOp);
-  }
-  // Bind the reduction arguments to their block arguments
-  for (auto [arg, prv] : llvm::zip_equal(
-           reductionArgs,
-           llvm::drop_begin(entryBlock->getArguments(), loopArgs.size()))) {
-    converter.bindSymbol(*arg, prv);
-  }
-
-  return loopArgs;
-}
-
-static void
-createSimdLoop(Fortran::lower::AbstractConverter &converter,
-               Fortran::semantics::SemanticsContext &semaCtx,
-               Fortran::lower::pft::Evaluation &eval,
-               llvm::omp::Directive ompDirective,
-               const Fortran::parser::OmpClauseList &loopOpClauseList,
-               mlir::Location loc) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  DataSharingProcessor dsp(converter, loopOpClauseList, eval);
-  dsp.processStep1();
-
-  Fortran::lower::StatementContext stmtCtx;
-  mlir::Value scheduleChunkClauseOperand, ifClauseOperand;
-  llvm::SmallVector<mlir::Value> lowerBound, upperBound, step, reductionVars;
-  llvm::SmallVector<mlir::Value> alignedVars, nontemporalVars;
-  llvm::SmallVector<const Fortran::semantics::Symbol *> iv;
-  llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
-  mlir::omp::ClauseOrderKindAttr orderClauseOperand;
-  mlir::IntegerAttr simdlenClauseOperand, safelenClauseOperand;
-  std::size_t loopVarTypeSize;
-
-  ClauseProcessor cp(converter, semaCtx, loopOpClauseList);
-  cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv,
-                     loopVarTypeSize);
-  cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand);
-  cp.processReduction(loc, reductionVars, reductionDeclSymbols);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Simd,
-               ifClauseOperand);
-  cp.processSimdlen(simdlenClauseOperand);
-  cp.processSafelen(safelenClauseOperand);
-  cp.processTODO<Fortran::parser::OmpClause::Aligned,
-                 Fortran::parser::OmpClause::Allocate,
-                 Fortran::parser::OmpClause::Linear,
-                 Fortran::parser::OmpClause::Nontemporal,
-                 Fortran::parser::OmpClause::Order>(loc, ompDirective);
-
-  convertLoopBounds(converter, loc, lowerBound, upperBound, step,
-                    loopVarTypeSize);
-
-  mlir::TypeRange resultType;
-  auto simdLoopOp = firOpBuilder.create<mlir::omp::SimdLoopOp>(
-      loc, resultType, lowerBound, upperBound, step, alignedVars,
-      /*alignment_values=*/nullptr, ifClauseOperand, nontemporalVars,
-      orderClauseOperand, simdlenClauseOperand, safelenClauseOperand,
-      /*inclusive=*/firOpBuilder.getUnitAttr());
-
-  auto *nestedEval = getCollapsedLoopEval(
-      eval, Fortran::lower::getCollapseValue(loopOpClauseList));
-
-  auto ivCallback = [&](mlir::Operation *op) {
-    return genLoopVars(op, converter, loc, iv);
-  };
-
-  createBodyOfOp<mlir::omp::SimdLoopOp>(
-      simdLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval)
-                      .setClauses(&loopOpClauseList)
-                      .setDataSharingProcessor(&dsp)
-                      .setGenRegionEntryCb(ivCallback));
-}
-
-static void createWsLoop(Fortran::lower::AbstractConverter &converter,
-                         Fortran::semantics::SemanticsContext &semaCtx,
-                         Fortran::lower::pft::Evaluation &eval,
-                         llvm::omp::Directive ompDirective,
-                         const Fortran::parser::OmpClauseList &beginClauseList,
-                         const Fortran::parser::OmpClauseList *endClauseList,
-                         mlir::Location loc) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  DataSharingProcessor dsp(converter, beginClauseList, eval);
-  dsp.processStep1();
-
-  Fortran::lower::StatementContext stmtCtx;
-  mlir::Value scheduleChunkClauseOperand;
-  llvm::SmallVector<mlir::Value> lowerBound, upperBound, step, reductionVars;
-  llvm::SmallVector<mlir::Value> linearVars, linearStepVars;
-  llvm::SmallVector<const Fortran::semantics::Symbol *> iv;
-  llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
-  llvm::SmallVector<const Fortran::semantics::Symbol *> reductionSymbols;
-  mlir::omp::ClauseOrderKindAttr orderClauseOperand;
-  mlir::omp::ClauseScheduleKindAttr scheduleValClauseOperand;
-  mlir::UnitAttr nowaitClauseOperand, scheduleSimdClauseOperand;
-  mlir::IntegerAttr orderedClauseOperand;
-  mlir::omp::ScheduleModifierAttr scheduleModClauseOperand;
-  std::size_t loopVarTypeSize;
-
-  ClauseProcessor cp(converter, semaCtx, beginClauseList);
-  cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv,
-                     loopVarTypeSize);
-  cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand);
-  cp.processReduction(loc, reductionVars, reductionDeclSymbols,
-                      &reductionSymbols);
-  cp.processTODO<Fortran::parser::OmpClause::Linear,
-                 Fortran::parser::OmpClause::Order>(loc, ompDirective);
-
-  convertLoopBounds(converter, loc, lowerBound, upperBound, step,
-                    loopVarTypeSize);
-
-  auto wsLoopOp = firOpBuilder.create<mlir::omp::WsLoopOp>(
-      loc, lowerBound, upperBound, step, linearVars, linearStepVars,
-      reductionVars,
-      reductionDeclSymbols.empty()
-          ? nullptr
-          : mlir::ArrayAttr::get(firOpBuilder.getContext(),
-                                 reductionDeclSymbols),
-      scheduleValClauseOperand, scheduleChunkClauseOperand,
-      /*schedule_modifiers=*/nullptr,
-      /*simd_modifier=*/nullptr, nowaitClauseOperand, orderedClauseOperand,
-      orderClauseOperand,
-      /*inclusive=*/firOpBuilder.getUnitAttr());
-
-  // Handle attribute based clauses.
-  if (cp.processOrdered(orderedClauseOperand))
-    wsLoopOp.setOrderedValAttr(orderedClauseOperand);
-
-  if (cp.processSchedule(scheduleValClauseOperand, scheduleModClauseOperand,
-                         scheduleSimdClauseOperand)) {
-    wsLoopOp.setScheduleValAttr(scheduleValClauseOperand);
-    wsLoopOp.setScheduleModifierAttr(scheduleModClauseOperand);
-    wsLoopOp.setSimdModifierAttr(scheduleSimdClauseOperand);
-  }
-  // In FORTRAN `nowait` clause occur at the end of `omp do` directive.
-  // i.e
-  // !$omp do
-  // <...>
-  // !$omp end do nowait
-  if (endClauseList) {
-    if (ClauseProcessor(converter, semaCtx, *endClauseList)
-            .processNowait(nowaitClauseOperand))
-      wsLoopOp.setNowaitAttr(nowaitClauseOperand);
-  }
-
-  auto *nestedEval = getCollapsedLoopEval(
-      eval, Fortran::lower::getCollapseValue(beginClauseList));
-
-  llvm::SmallVector<mlir::Type> reductionTypes;
-  reductionTypes.reserve(reductionVars.size());
-  llvm::transform(reductionVars, std::back_inserter(reductionTypes),
-                  [](mlir::Value v) { return v.getType(); });
-
-  auto ivCallback = [&](mlir::Operation *op) {
-    return genLoopAndReductionVars(op, converter, loc, iv, reductionSymbols,
-                                   reductionTypes);
-  };
-
-  createBodyOfOp<mlir::omp::WsLoopOp>(
-      wsLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval)
-                    .setClauses(&beginClauseList)
-                    .setDataSharingProcessor(&dsp)
-                    .setReductions(&reductionSymbols, &reductionTypes)
-                    .setGenRegionEntryCb(ivCallback));
-
-  // Create trip_count if inside of omp.target and this is host compilation
-  auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
-      firOpBuilder.getModule().getOperation());
-  auto targetOp = wsLoopOp->getParentOfType<mlir::omp::TargetOp>();
-
-  if (offloadMod && targetOp && !offloadMod.getIsTargetDevice() &&
-      targetOp.isTargetSPMDLoop()) {
-    // Lower loop bounds and step, and process collapsing again, putting lowered
-    // values outside of omp.target this time. This enables calculating and
-    // accessing the trip count in the host, which is needed when lowering to
-    // LLVM IR via the OMPIRBuilder.
-    HostClausesInsertionGuard guard(firOpBuilder);
-    llvm::SmallVector<mlir::Value> outsideLB, outsideUB, outsideStep;
-    llvm::SmallVector<const Fortran::semantics::Symbol *> outsideIV;
-    cp.processCollapse(loc, eval, outsideLB, outsideUB, outsideStep, outsideIV,
-                       loopVarTypeSize);
-    targetOp.getTripCountMutable().assign(
-        calculateTripCount(converter, loc, outsideLB, outsideUB, outsideStep));
-  }
-}
-
-static void createSimdWsLoop(
-    Fortran::lower::AbstractConverter &converter,
-    Fortran::semantics::SemanticsContext &semaCtx,
-    Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive ompDirective,
-    const Fortran::parser::OmpClauseList &beginClauseList,
-    const Fortran::parser::OmpClauseList *endClauseList, mlir::Location loc) {
-  ClauseProcessor cp(converter, semaCtx, beginClauseList);
-  cp.processTODO<
-      Fortran::parser::OmpClause::Aligned, Fortran::parser::OmpClause::Allocate,
-      Fortran::parser::OmpClause::Linear, Fortran::parser::OmpClause::Safelen,
-      Fortran::parser::OmpClause::Simdlen, Fortran::parser::OmpClause::Order>(
-      loc, ompDirective);
-  // TODO: Add support for vectorization - add vectorization hints inside loop
-  // body.
-  // OpenMP standard does not specify the length of vector instructions.
-  // Currently we safely assume that for !$omp do simd pragma the SIMD length
-  // is equal to 1 (i.e. we generate standard workshare loop).
-  // When support for vectorization is enabled, then we need to add handling of
-  // if clause. Currently if clause can be skipped because we always assume
-  // SIMD length = 1.
-  createWsLoop(converter, semaCtx, eval, ompDirective, beginClauseList,
-               endClauseList, loc);
-}
-
 static void genOMP(Fortran::lower::AbstractConverter &converter,
                    Fortran::lower::SymMap &symTable,
                    Fortran::semantics::SemanticsContext &semaCtx,
@@ -1973,13 +2283,16 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
                    const Fortran::parser::OpenMPLoopConstruct &loopConstruct) {
   const auto &beginLoopDirective =
       std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t);
-  const auto &loopOpClauseList =
+  const auto &beginClauseList =
       std::get<Fortran::parser::OmpClauseList>(beginLoopDirective.t);
   mlir::Location currentLocation =
       converter.genLocation(beginLoopDirective.source);
-  const auto ompDirective =
+  llvm::omp::Directive origDirective =
       std::get<Fortran::parser::OmpLoopDirective>(beginLoopDirective.t).v;
 
+  assert(llvm::omp::loopConstructSet.test(origDirective) &&
+         "Expected loop construct");
+
   const auto *endClauseList = [&]() {
     using RetTy = const Fortran::parser::OmpClauseList *;
     if (auto &endLoopDirective =
@@ -1991,61 +2304,191 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
     return RetTy();
   }();
 
-  bool validDirective = false;
-  if (llvm::omp::topTaskloopSet.test(ompDirective)) {
-    validDirective = true;
-    TODO(currentLocation, "Taskloop construct");
-  } else {
-    // Create omp.{target, teams, distribute, parallel} nested operations
-    if ((llvm::omp::allTargetSet & llvm::omp::loopConstructSet)
-            .test(ompDirective)) {
-      validDirective = true;
-      genTargetOp(converter, semaCtx, eval, /*genNested=*/false,
-                  currentLocation, loopOpClauseList, ompDirective,
-                  /*outerCombined=*/true);
+  /// Utility to remove the first leaf construct from a combined loop construct.
+  /// Composite constructs are not handled, as they cannot be split in that way.
+  auto peelCombinedLoopDirective =
+      [](llvm::omp::Directive dir) -> llvm::omp::Directive {
+    using D = llvm::omp::Directive;
+    switch (dir) {
+    case D::OMPD_masked_taskloop:
+    case D::OMPD_master_taskloop:
+      return D::OMPD_taskloop;
+    case D::OMPD_masked_taskloop_simd:
+    case D::OMPD_master_taskloop_simd:
+      return D::OMPD_taskloop_simd;
+    case D::OMPD_parallel_do:
+      return D::OMPD_do;
+    case D::OMPD_parallel_do_simd:
+      return D::OMPD_do_simd;
+    case D::OMPD_parallel_masked_taskloop:
+      return D::OMPD_masked_taskloop;
+    case D::OMPD_parallel_master_taskloop:
+      return D::OMPD_master_taskloop;
+    case D::OMPD_parallel_masked_taskloop_simd:
+      return D::OMPD_masked_taskloop_simd;
+    case D::OMPD_parallel_master_taskloop_simd:
+      return D::OMPD_master_taskloop_simd;
+    case D::OMPD_target_parallel_do:
+      return D::OMPD_parallel_do;
+    case D::OMPD_target_parallel_do_simd:
+      return D::OMPD_parallel_do_simd;
+    case D::OMPD_target_simd:
+      return D::OMPD_simd;
+    case D::OMPD_target_teams_distribute:
+      return D::OMPD_teams_distribute;
+    case D::OMPD_target_teams_distribute_parallel_do:
+      return D::OMPD_teams_distribute_parallel_do;
+    case D::OMPD_target_teams_distribute_parallel_do_simd:
+      return D::OMPD_teams_distribute_parallel_do_simd;
+    case D::OMPD_target_teams_distribute_simd:
+      return D::OMPD_teams_distribute_simd;
+    case D::OMPD_teams_distribute:
+      return D::OMPD_distribute;
+    case D::OMPD_teams_distribute_parallel_do:
+      return D::OMPD_distribute_parallel_do;
+    case D::OMPD_teams_distribute_parallel_do_simd:
+      return D::OMPD_distribute_parallel_do_simd;
+    case D::OMPD_teams_distribute_simd:
+      return D::OMPD_distribute_simd;
+    case D::OMPD_parallel_loop:
+    case D::OMPD_teams_loop:
+      return D::OMPD_loop;
+    case D::OMPD_target_parallel_loop:
+      return D::OMPD_parallel_loop;
+    case D::OMPD_target_teams_loop:
+      return D::OMPD_teams_loop;
+    default:
+      llvm_unreachable("Unexpected non-combined loop construct");
     }
-    if ((llvm::omp::allTeamsSet & llvm::omp::loopConstructSet)
-            .test(ompDirective)) {
-      validDirective = true;
-      genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
-                 loopOpClauseList, /*outerCombined=*/true);
-    }
-    if (llvm::omp::allDistributeSet.test(ompDirective)) {
-      validDirective = true;
-      bool outerCombined = llvm::omp::topDistributeSet.test(ompDirective);
-      genDistributeOp(converter, semaCtx, eval, /*genNested=*/false,
-                      currentLocation, loopOpClauseList, outerCombined);
-    }
-    if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet)
-            .test(ompDirective)) {
-      validDirective = true;
-      genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
-                    currentLocation, loopOpClauseList,
-                    /*outerCombined=*/true);
+  };
+
+  // Privatization and loop nest clause processing must be done before producing
+  // any wrappers and after combined constructs, so that any operations created
+  // are outside of the wrapper nest.
+  DataSharingProcessor dsp(converter, beginClauseList, eval);
+  LoopNestOpClauseOps clauseOps;
+  auto processLoopNestClauses = [&]() {
+    dsp.processStep1();
+    genLoopNestClauses(converter, semaCtx, eval, beginClauseList,
+                       currentLocation, clauseOps);
+  };
+
+  llvm::omp::Directive ompDirective = origDirective;
+  if (llvm::omp::topTargetSet.test(ompDirective)) {
+    // TODO Combined constructs: Call gen<X>Clauses and pass them in.
+    genTargetOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
+                beginClauseList, /*outerCombined=*/true);
+    ompDirective = peelCombinedLoopDirective(ompDirective);
+  }
+
+  if (llvm::omp::topTeamsSet.test(ompDirective)) {
+    genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
+               beginClauseList, /*outerCombined=*/true);
+    ompDirective = peelCombinedLoopDirective(ompDirective);
+  }
+
+  if (llvm::omp::topParallelSet.test(ompDirective)) {
+    genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
+                  /*isComposite=*/false, currentLocation, beginClauseList,
+                  /*outerCombined=*/true);
+    ompDirective = peelCombinedLoopDirective(ompDirective);
+    processLoopNestClauses();
+  } else {
+    processLoopNestClauses();
+
+    if (llvm::omp::topDistributeSet.test(ompDirective)) {
+      switch (ompDirective) {
+      case llvm::omp::Directive::OMPD_distribute:
+        genDistributeOp(converter, semaCtx, eval, /*isComposite=*/false,
+                        currentLocation, beginClauseList,
+                        /*outerCombined=*/true);
+        break;
+      case llvm::omp::Directive::OMPD_distribute_parallel_do:
+        genCompositeDistributeParallelDo(converter, semaCtx, eval,
+                                         beginClauseList, endClauseList,
+                                         currentLocation);
+        break;
+      case llvm::omp::Directive::OMPD_distribute_parallel_do_simd:
+        genCompositeDistributeParallelDoSimd(converter, semaCtx, eval,
+                                             beginClauseList, endClauseList,
+                                             currentLocation);
+        break;
+      case llvm::omp::Directive::OMPD_distribute_simd:
+        genCompositeDistributeSimd(converter, semaCtx, eval, beginClauseList,
+                                   endClauseList, currentLocation);
+        break;
+      default:
+        llvm_unreachable("Unexpected DISTRIBUTE construct");
+      }
+    } else if (llvm::omp::topTaskloopSet.test(ompDirective)) {
+      switch (ompDirective) {
+      case llvm::omp::Directive::OMPD_taskloop_simd:
+        genCompositeTaskLoopSimd(converter, semaCtx, eval, beginClauseList,
+                                 endClauseList, currentLocation);
+        break;
+      case llvm::omp::Directive::OMPD_taskloop:
+        genTaskLoopOp(converter, semaCtx, eval, /*isComposite=*/false,
+                      currentLocation, beginClauseList);
+        break;
+      default:
+        llvm_unreachable("Unexpected TASKLOOP construct");
+      }
+    } else if (ompDirective == llvm::omp::Directive::OMPD_simd) {
+      genSimdLoopOp(converter, semaCtx, eval, /*isComposite=*/false,
+                    currentLocation, beginClauseList);
+    } else if (!llvm::omp::topDoSet.test(ompDirective)) {
+      TODO(currentLocation,
+           "Unhandled loop directive (" +
+               llvm::omp::getOpenMPDirectiveName(origDirective) + ")");
     }
   }
-  if ((llvm::omp::allDoSet | llvm::omp::allSimdSet).test(ompDirective))
-    validDirective = true;
 
-  if (!validDirective) {
+  if (llvm::omp::topDoSet.test(ompDirective)) {
+    switch (ompDirective) {
+    case llvm::omp::Directive::OMPD_do_simd:
+      genCompositeDoSimd(converter, semaCtx, eval, beginClauseList,
+                         endClauseList, currentLocation);
+      break;
+    case llvm::omp::Directive::OMPD_do:
+      genWsLoopOp(converter, semaCtx, eval, /*isComposite=*/false,
+                  currentLocation, beginClauseList, endClauseList);
+      break;
+    default:
+      llvm_unreachable("Unexpected DO construct");
+    }
+  } else if (llvm::omp::allParallelSet.test(origDirective)) {
     TODO(currentLocation, "Unhandled loop directive (" +
-                              llvm::omp::getOpenMPDirectiveName(ompDirective) +
+                              llvm::omp::getOpenMPDirectiveName(origDirective) +
                               ")");
   }
 
-  if (llvm::omp::allDoSimdSet.test(ompDirective)) {
-    // 2.9.3.2 Workshare SIMD construct
-    createSimdWsLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList,
-                     endClauseList, currentLocation);
+  // Create inner loop nest and body.
+  mlir::omp::LoopNestOp loopNestOp =
+      genLoopNestOp(converter, semaCtx, eval, currentLocation, beginClauseList,
+                    clauseOps, dsp);
 
-  } else if (llvm::omp::allSimdSet.test(ompDirective)) {
-    // 2.9.3.1 SIMD construct
-    createSimdLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList,
-                   currentLocation);
-    genOpenMPReduction(converter, semaCtx, loopOpClauseList);
-  } else {
-    createWsLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList,
-                 endClauseList, currentLocation);
+  if (ompDirective == llvm::omp::Directive::OMPD_simd)
+    genOpenMPReduction(converter, semaCtx, beginClauseList);
+
+  // Create trip_count outside of omp.target if this is host compilation and the
+  // loop is inside of a target region.
+  auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
+      converter.getModuleOp().getOperation());
+  auto targetOp = loopNestOp->getParentOfType<mlir::omp::TargetOp>();
+
+  if (offloadMod && targetOp && !offloadMod.getIsTargetDevice() &&
+      targetOp.isTargetSPMDLoop()) {
+    // Lower loop bounds and step, and process collapsing again, putting lowered
+    // values outside of omp.target this time. This enables calculating and
+    // accessing the trip count in the host, which is needed when lowering to
+    // LLVM IR via the OMPIRBuilder.
+    HostClausesInsertionGuard guard(converter.getFirOpBuilder());
+    CollapseClauseOps collapseOps;
+    ClauseProcessor(converter, semaCtx, beginClauseList)
+        .processCollapse(currentLocation, eval, collapseOps);
+    targetOp.getTripCountMutable().assign(
+        calculateTripCount(converter, currentLocation, collapseOps.loopLBVar,
+                           collapseOps.loopUBVar, collapseOps.loopStepVar));
   }
 }
 
@@ -2066,6 +2509,9 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   const auto &endClauseList =
       std::get<Fortran::parser::OmpClauseList>(endBlockDirective.t);
 
+  assert(llvm::omp::blockConstructSet.test(directive.v) &&
+         "Expected block construct");
+
   for (const Fortran::parser::OmpClause &clause : beginClauseList.v) {
     mlir::Location clauseLocation = converter.genLocation(clause.source);
     if (!std::get_if<Fortran::parser::OmpClause::If>(&clause.u) &&
@@ -2106,11 +2552,11 @@ genOMP(Fortran::lower::AbstractConverter &converter,
     break;
   case llvm::omp::Directive::OMPD_ordered:
     genOrderedRegionOp(converter, semaCtx, eval, /*genNested=*/true,
-                       currentLocation);
+                       currentLocation, beginClauseList);
     break;
   case llvm::omp::Directive::OMPD_parallel:
     genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
-                  currentLocation, beginClauseList);
+                  /*isComposite=*/false, currentLocation, beginClauseList);
     break;
   case llvm::omp::Directive::OMPD_single:
     genSingleOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
@@ -2118,7 +2564,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
     break;
   case llvm::omp::Directive::OMPD_target:
     genTargetOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
-                beginClauseList, directive.v);
+                beginClauseList);
     break;
   case llvm::omp::Directive::OMPD_target_data:
     genDataOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
@@ -2153,29 +2599,25 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 
   // Codegen for combined directives
   bool combinedDirective = false;
-  if ((llvm::omp::allTargetSet & llvm::omp::blockConstructSet)
-          .test(directive.v)) {
+  if (llvm::omp::allTargetSet.test(directive.v)) {
     genTargetOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
-                beginClauseList, directive.v,
-                /*outerCombined=*/true);
+                beginClauseList, /*outerCombined=*/true);
     combinedDirective = true;
   }
-  if ((llvm::omp::allTeamsSet & llvm::omp::blockConstructSet)
-          .test(directive.v)) {
+  if (llvm::omp::allTeamsSet.test(directive.v)) {
     genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
                beginClauseList);
     combinedDirective = true;
   }
-  if ((llvm::omp::allParallelSet & llvm::omp::blockConstructSet)
-          .test(directive.v)) {
+  if (llvm::omp::allParallelSet.test(directive.v)) {
     bool outerCombined =
         directive.v != llvm::omp::Directive::OMPD_target_parallel;
     genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
-                  currentLocation, beginClauseList, outerCombined);
+                  /*isComposite=*/false, currentLocation, beginClauseList,
+                  outerCombined);
     combinedDirective = true;
   }
-  if ((llvm::omp::workShareSet & llvm::omp::blockConstructSet)
-          .test(directive.v)) {
+  if (llvm::omp::workShareSet.test(directive.v)) {
     genSingleOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
                 beginClauseList, endClauseList);
     combinedDirective = true;
@@ -2194,38 +2636,13 @@ genOMP(Fortran::lower::AbstractConverter &converter,
        Fortran::semantics::SemanticsContext &semaCtx,
        Fortran::lower::pft::Evaluation &eval,
        const Fortran::parser::OpenMPCriticalConstruct &criticalConstruct) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  mlir::Location currentLocation = converter.getCurrentLocation();
-  mlir::IntegerAttr hintClauseOp;
-  std::string name;
-  const Fortran::parser::OmpCriticalDirective &cd =
+  const auto &cd =
       std::get<Fortran::parser::OmpCriticalDirective>(criticalConstruct.t);
-  if (std::get<std::optional<Fortran::parser::Name>>(cd.t).has_value()) {
-    name =
-        std::get<std::optional<Fortran::parser::Name>>(cd.t).value().ToString();
-  }
-
   const auto &clauseList = std::get<Fortran::parser::OmpClauseList>(cd.t);
-  ClauseProcessor(converter, semaCtx, clauseList).processHint(hintClauseOp);
-
-  mlir::omp::CriticalOp criticalOp = [&]() {
-    if (name.empty()) {
-      return firOpBuilder.create<mlir::omp::CriticalOp>(
-          currentLocation, mlir::FlatSymbolRefAttr());
-    }
-    mlir::ModuleOp module = firOpBuilder.getModule();
-    mlir::OpBuilder modBuilder(module.getBodyRegion());
-    auto global = module.lookupSymbol<mlir::omp::CriticalDeclareOp>(name);
-    if (!global)
-      global = modBuilder.create<mlir::omp::CriticalDeclareOp>(
-          currentLocation,
-          mlir::StringAttr::get(firOpBuilder.getContext(), name), hintClauseOp);
-    return firOpBuilder.create<mlir::omp::CriticalOp>(
-        currentLocation, mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(),
-                                                      global.getSymName()));
-  }();
-  auto genInfo = OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval);
-  createBodyOfOp<mlir::omp::CriticalOp>(criticalOp, genInfo);
+  const auto &name = std::get<std::optional<Fortran::parser::Name>>(cd.t);
+  mlir::Location currentLocation = converter.getCurrentLocation();
+  genCriticalOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
+                clauseList, name);
 }
 
 static void
@@ -2234,55 +2651,48 @@ genOMP(Fortran::lower::AbstractConverter &converter,
        Fortran::semantics::SemanticsContext &semaCtx,
        Fortran::lower::pft::Evaluation &eval,
        const Fortran::parser::OpenMPSectionsConstruct &sectionsConstruct) {
-  mlir::Location currentLocation = converter.getCurrentLocation();
-  llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands;
-  mlir::UnitAttr nowaitClauseOperand;
   const auto &beginSectionsDirective =
       std::get<Fortran::parser::OmpBeginSectionsDirective>(sectionsConstruct.t);
-  const auto &sectionsClauseList =
+  const auto &beginClauseList =
       std::get<Fortran::parser::OmpClauseList>(beginSectionsDirective.t);
-
-  // Process clauses before optional omp.parallel, so that new variables are
-  // allocated outside of the parallel region
-  ClauseProcessor cp(converter, semaCtx, sectionsClauseList);
-  cp.processSectionsReduction(currentLocation);
-  cp.processAllocate(allocatorOperands, allocateOperands);
-
   llvm::omp::Directive dir =
       std::get<Fortran::parser::OmpSectionsDirective>(beginSectionsDirective.t)
           .v;
+  const auto &sectionBlocks =
+      std::get<Fortran::parser::OmpSectionBlocks>(sectionsConstruct.t);
+
+  // Process clauses before optional omp.parallel, so that new variables are
+  // allocated outside of the parallel region.
+  mlir::Location currentLocation = converter.getCurrentLocation();
+  SectionsOpClauseOps clauseOps;
+  genSectionsClauses(converter, semaCtx, beginClauseList, currentLocation,
+                     /*clausesFromBeginSections=*/true, clauseOps);
 
-  // Parallel wrapper of PARALLEL SECTIONS construct
+  // Parallel wrapper of PARALLEL SECTIONS construct.
   if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
     genParallelOp(converter, symTable, semaCtx, eval,
-                  /*genNested=*/false, currentLocation, sectionsClauseList,
-                  /*outerCombined=*/true);
+                  /*genNested=*/false, /*isComposite=*/false, currentLocation,
+                  beginClauseList, /*outerCombined=*/true);
   } else {
     const auto &endSectionsDirective =
         std::get<Fortran::parser::OmpEndSectionsDirective>(sectionsConstruct.t);
-    const auto &endSectionsClauseList =
+    const auto &endClauseList =
         std::get<Fortran::parser::OmpClauseList>(endSectionsDirective.t);
-    ClauseProcessor(converter, semaCtx, endSectionsClauseList)
-        .processNowait(nowaitClauseOperand);
+    genSectionsClauses(converter, semaCtx, endClauseList, currentLocation,
+                       /*clausesFromBeginSections=*/false, clauseOps);
   }
 
-  // SECTIONS construct
-  genOpWithBody<mlir::omp::SectionsOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(false),
-      /*reduction_vars=*/mlir::ValueRange(),
-      /*reductions=*/nullptr, allocateOperands, allocatorOperands,
-      nowaitClauseOperand);
+  // SECTIONS construct.
+  genSectionsOp(converter, semaCtx, eval, currentLocation, clauseOps);
 
-  const auto &sectionBlocks =
-      std::get<Fortran::parser::OmpSectionBlocks>(sectionsConstruct.t);
+  // Generate nested SECTION operations recursively.
   auto &firOpBuilder = converter.getFirOpBuilder();
   auto ip = firOpBuilder.saveInsertionPoint();
   for (const auto &[nblock, neval] :
        llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) {
     symTable.pushScope();
     genSectionOp(converter, semaCtx, neval, /*genNested=*/true, currentLocation,
-                 sectionsClauseList);
+                 beginClauseList);
     symTable.popScope();
     firOpBuilder.restoreInsertionPoint(ip);
   }
@@ -2366,12 +2776,12 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
                    Fortran::lower::pft::Evaluation &eval,
                    const Fortran::parser::OpenMPDeclareTargetConstruct
                        &declareTargetConstruct) {
-  llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
   mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
+  DeclareTargetOpClauseOps clauseOps;
   mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo(
-      converter, semaCtx, eval, declareTargetConstruct, symbolAndClause);
+      converter, semaCtx, eval, declareTargetConstruct, clauseOps);
 
-  for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
+  for (const DeclareTargetCapturePair &symClause : clauseOps.symbolAndClause) {
     mlir::Operation *op = mod.lookupSymbol(converter.mangleName(
         std::get<const Fortran::semantics::Symbol &>(symClause)));
 
@@ -2482,11 +2892,10 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder,
                                                      mlir::Operation *op,
                                                      mlir::Location loc) {
-  if (mlir::isa<mlir::omp::WsLoopOp, mlir::omp::ReductionDeclareOp,
-                mlir::omp::AtomicUpdateOp, mlir::omp::SimdLoopOp>(op))
+  if (mlir::isa<mlir::omp::LoopNestOp, mlir::omp::ReductionDeclareOp,
+                mlir::omp::AtomicUpdateOp>(op))
     return builder.create<mlir::omp::YieldOp>(loc);
-  else
-    return builder.create<mlir::omp::TerminatorOp>(loc);
+  return builder.create<mlir::omp::TerminatorOp>(loc);
 }
 
 void Fortran::lower::genOpenMPConstruct(
diff --git a/flang/lib/Lower/OpenMP/OperationClauses.h b/flang/lib/Lower/OpenMP/OperationClauses.h
new file mode 100644
index 0000000000000..ed49c3f0750b1
--- /dev/null
+++ b/flang/lib/Lower/OpenMP/OperationClauses.h
@@ -0,0 +1,306 @@
+//===-- Lower/OpenMP/OperationClauses.h -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "Utils.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include <optional>
+
+namespace Fortran {
+namespace semantics {
+class Symbol;
+} // namespace semantics
+} // namespace Fortran
+
+namespace Fortran {
+namespace lower {
+namespace omp {
+
+//===----------------------------------------------------------------------===//
+// Mixin structures defining operands associated with each OpenMP clause.
+//===----------------------------------------------------------------------===//
+
+struct AlignedClauseOps {
+  llvm::SmallVector<mlir::Value> alignedVars;
+  llvm::SmallVector<mlir::Attribute> alignmentAttrs;
+};
+
+struct AllocateClauseOps {
+  llvm::SmallVector<mlir::Value> allocatorVars, allocateVars;
+};
+
+struct CollapseClauseOps {
+  llvm::SmallVector<mlir::Value> loopLBVar, loopUBVar, loopStepVar;
+  llvm::SmallVector<const Fortran::semantics::Symbol *> loopIV;
+};
+
+struct CopyinClauseOps {};
+
+struct CopyprivateClauseOps {
+  llvm::SmallVector<mlir::Value> copyprivateVars;
+  llvm::SmallVector<mlir::Attribute> copyprivateFuncs;
+};
+
+struct DependClauseOps {
+  llvm::SmallVector<mlir::Attribute> dependTypeAttrs;
+  llvm::SmallVector<mlir::Value> dependVars;
+};
+
+struct DeviceClauseOps {
+  mlir::Value deviceVar;
+};
+
+struct DeviceTypeClauseOps {
+  mlir::omp::DeclareTargetDeviceType deviceType;
+};
+
+struct DistScheduleClauseOps {
+  mlir::UnitAttr distScheduleStaticAttr;
+  mlir::Value distScheduleChunkSizeVar;
+};
+
+struct EnterLinkToClauseOps {
+  llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
+};
+
+struct FinalClauseOps {
+  mlir::Value finalVar;
+};
+
+struct GrainsizeClauseOps {
+  mlir::Value grainsizeVar;
+};
+
+struct HintClauseOps {
+  mlir::IntegerAttr hintAttr;
+};
+
+struct IfClauseOps {
+  mlir::Value ifVar;
+};
+
+struct InReductionClauseOps {
+  llvm::SmallVector<mlir::Value> inReductionVars;
+  llvm::SmallVector<mlir::Type> inReductionTypes;
+  llvm::SmallVector<mlir::Attribute> inReductionDeclSymbols;
+  std::optional<llvm::SmallVector<const Fortran::semantics::Symbol *>>
+      inReductionSymbols;
+};
+
+struct LinearClauseOps {
+  llvm::SmallVector<mlir::Value> linearVars, linearStepVars;
+};
+
+// The optional parameters - mapSymTypes, mapSymLocs & mapSymbols are used to
+// store the original type, location and Fortran symbol for the map operands.
+// They may be used later on to create the block_arguments for some of the
+// target directives that require it.
+struct MapClauseOps {
+  llvm::SmallVector<mlir::Value> mapVars;
+  std::optional<llvm::SmallVector<mlir::Type>> mapSymTypes;
+  std::optional<llvm::SmallVector<mlir::Location>> mapSymLocs;
+  std::optional<llvm::SmallVector<const Fortran::semantics::Symbol *>>
+      mapSymbols;
+};
+
+struct MergeableClauseOps {
+  mlir::UnitAttr mergeableAttr;
+};
+
+struct NogroupClauseOps {
+  mlir::UnitAttr nogroupAttr;
+};
+
+struct NontemporalClauseOps {
+  llvm::SmallVector<mlir::Value> nontemporalVars;
+};
+
+struct NowaitClauseOps {
+  mlir::UnitAttr nowaitAttr;
+};
+
+struct NumTasksClauseOps {
+  mlir::Value numTasksVar;
+};
+
+struct NumTeamsClauseOps {
+  mlir::Value numTeamsLowerVar;
+  mlir::Value numTeamsUpperVar;
+};
+
+struct NumThreadsClauseOps {
+  mlir::Value numThreadsVar;
+};
+
+struct OrderClauseOps {
+  mlir::omp::ClauseOrderKindAttr orderAttr;
+};
+
+struct OrderedClauseOps {
+  mlir::IntegerAttr orderedAttr;
+};
+
+struct ParallelizationLevelClauseOps {
+  mlir::UnitAttr parLevelThreadsAttr;
+  mlir::UnitAttr parLevelSimdAttr;
+};
+
+struct PriorityClauseOps {
+  mlir::Value priorityVar;
+};
+
+struct PrivateClauseOps {
+  llvm::SmallVector<mlir::Value> privateVars;
+  llvm::SmallVector<mlir::Attribute> privatizers;
+};
+
+struct ProcBindClauseOps {
+  mlir::omp::ClauseProcBindKindAttr procBindKindAttr;
+};
+
+struct ReductionClauseOps {
+  llvm::SmallVector<mlir::Value> reductionVars;
+  llvm::SmallVector<mlir::Type> reductionTypes;
+  llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
+  std::optional<llvm::SmallVector<const Fortran::semantics::Symbol *>>
+      reductionSymbols;
+};
+
+struct SafelenClauseOps {
+  mlir::IntegerAttr safelenAttr;
+};
+
+struct ScheduleClauseOps {
+  mlir::omp::ClauseScheduleKindAttr scheduleValAttr;
+  mlir::omp::ScheduleModifierAttr scheduleModAttr;
+  mlir::Value scheduleChunkVar;
+  mlir::UnitAttr scheduleSimdAttr;
+};
+
+struct SimdlenClauseOps {
+  mlir::IntegerAttr simdlenAttr;
+};
+
+struct TargetReductionClauseOps {
+  llvm::SmallVector<const Fortran::semantics::Symbol *> targetReductionSymbols;
+};
+
+struct TaskReductionClauseOps {
+  llvm::SmallVector<mlir::Value> taskReductionVars;
+  llvm::SmallVector<mlir::Type> taskReductionTypes;
+  llvm::SmallVector<mlir::Attribute> taskReductionDeclSymbols;
+  std::optional<llvm::SmallVector<const Fortran::semantics::Symbol *>>
+      taskReductionSymbols;
+};
+
+struct ThreadLimitClauseOps {
+  mlir::Value threadLimitVar;
+};
+
+struct UntiedClauseOps {
+  mlir::UnitAttr untiedAttr;
+};
+
+struct UseDeviceClauseOps {
+  llvm::SmallVector<mlir::Value> useDevicePtrVars;
+  llvm::SmallVector<mlir::Value> useDeviceAddrVars;
+  llvm::SmallVector<mlir::Type> useDeviceTypes;
+  llvm::SmallVector<mlir::Location> useDeviceLocs;
+  llvm::SmallVector<const Fortran::semantics::Symbol *> useDeviceSymbols;
+};
+
+//===----------------------------------------------------------------------===//
+// Structures defining clause operands associated with each OpenMP leaf
+// construct.
+//
+// These mirror the arguments expected by the corresponding OpenMP MLIR ops.
+//===----------------------------------------------------------------------===//
+
+namespace detail {
+template <typename... Mixins>
+struct Clauses : public Mixins... {};
+} // namespace detail
+
+using CriticalDeclareOpClauseOps = detail::Clauses<HintClauseOps>;
+
+using DataOpClauseOps = detail::Clauses<DeviceClauseOps, IfClauseOps,
+                                        MapClauseOps, UseDeviceClauseOps>;
+
+using DeclareTargetOpClauseOps = detail::Clauses<EnterLinkToClauseOps>;
+
+using DistributeOpClauseOps =
+    detail::Clauses<AllocateClauseOps, DistScheduleClauseOps, OrderClauseOps>;
+
+using EnterExitUpdateDataOpClauseOps =
+    detail::Clauses<DependClauseOps, DeviceClauseOps, IfClauseOps, MapClauseOps,
+                    NowaitClauseOps>;
+
+using LoopNestOpClauseOps = detail::Clauses<CollapseClauseOps>;
+
+// TODO Rename to "masked"
+// TODO `filter` clause.
+using MasterOpClauseOps = detail::Clauses<>;
+
+using OrderedRegionOpClauseOps = detail::Clauses<ParallelizationLevelClauseOps>;
+
+using ParallelOpClauseOps =
+    detail::Clauses<AllocateClauseOps, IfClauseOps, NumThreadsClauseOps,
+                    PrivateClauseOps, ProcBindClauseOps, ReductionClauseOps>;
+
+using SectionsOpClauseOps =
+    detail::Clauses<AllocateClauseOps, NowaitClauseOps, ReductionClauseOps>;
+
+// TODO `linear` clause.
+using SimdLoopOpClauseOps =
+    detail::Clauses<AlignedClauseOps, IfClauseOps, NontemporalClauseOps,
+                    OrderClauseOps, ReductionClauseOps, SafelenClauseOps,
+                    SimdlenClauseOps>;
+
+using SingleOpClauseOps =
+    detail::Clauses<AllocateClauseOps, CopyprivateClauseOps, NowaitClauseOps>;
+
+// TODO `allocate`, `defaultmap`, `has_device_addr`, `in_reduction`,
+// `is_device_ptr`, `uses_allocators` clauses.
+using TargetOpClauseOps =
+    detail::Clauses<DependClauseOps, DeviceClauseOps, IfClauseOps, MapClauseOps,
+                    NowaitClauseOps, TargetReductionClauseOps,
+                    ThreadLimitClauseOps>;
+
+using TaskGroupOpClauseOps =
+    detail::Clauses<AllocateClauseOps, TaskReductionClauseOps>;
+
+using TaskLoopOpClauseOps =
+    detail::Clauses<AllocateClauseOps, FinalClauseOps, GrainsizeClauseOps,
+                    IfClauseOps, InReductionClauseOps, MergeableClauseOps,
+                    NogroupClauseOps, NumTasksClauseOps, PriorityClauseOps,
+                    ReductionClauseOps, UntiedClauseOps>;
+
+// TODO `affinity`, `detach` clauses.
+using TaskOpClauseOps =
+    detail::Clauses<AllocateClauseOps, DependClauseOps, FinalClauseOps,
+                    IfClauseOps, InReductionClauseOps, MergeableClauseOps,
+                    PriorityClauseOps, UntiedClauseOps>;
+
+// TODO `depend`, `nowait` clauses.
+using TaskWaitOpClauseOps = detail::Clauses<>;
+
+using TeamsOpClauseOps =
+    detail::Clauses<AllocateClauseOps, IfClauseOps, NumTeamsClauseOps,
+                    ReductionClauseOps, ThreadLimitClauseOps>;
+
+// TODO `allocate` clause.
+using WsloopOpClauseOps =
+    detail::Clauses<LinearClauseOps, NowaitClauseOps, OrderClauseOps,
+                    OrderedClauseOps, ReductionClauseOps, ScheduleClauseOps>;
+
+} // namespace omp
+} // namespace lower
+} // namespace Fortran
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index 3b72148867874..2946d3ef2e6e1 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -60,7 +60,7 @@ ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType(
 
 void ReductionProcessor::addReductionSym(
     const Fortran::parser::OmpReductionClause &reduction,
-    llvm::SmallVector<const Fortran::semantics::Symbol *> &symbols) {
+    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &symbols) {
   const auto &objectList{std::get<Fortran::parser::OmpObjectList>(reduction.t)};
 
   for (const Fortran::parser::OmpObject &ompObject : objectList.v) {
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/lib/Lower/OpenMP/ReductionProcessor.h
index 85c286ead5282..abbd737084dc5 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.h
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.h
@@ -101,7 +101,7 @@ class ReductionProcessor {
 
   static void addReductionSym(
       const Fortran::parser::OmpReductionClause &reduction,
-      llvm::SmallVector<const Fortran::semantics::Symbol *> &symbols);
+      llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &symbols);
 
   /// Creates an OpenMP reduction declaration and inserts it into the provided
   /// symbol table. The declaration has a constant initializer with the neutral
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index d1bf73ba1dfd2..2858412d77561 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -14,6 +14,7 @@
 
 #include <flang/Lower/AbstractConverter.h>
 #include <flang/Lower/ConvertType.h>
+#include <flang/Optimizer/Builder/FIRBuilder.h>
 #include <flang/Parser/parse-tree.h>
 #include <flang/Parser/tools.h>
 #include <flang/Semantics/tools.h>
@@ -67,6 +68,24 @@ void genObjectList(const Fortran::parser::OmpObjectList &objectList,
   }
 }
 
+mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter,
+                          std::size_t loopVarTypeSize) {
+  // OpenMP runtime requires 32-bit or 64-bit loop variables.
+  loopVarTypeSize = loopVarTypeSize * 8;
+  if (loopVarTypeSize < 32) {
+    loopVarTypeSize = 32;
+  } else if (loopVarTypeSize > 64) {
+    loopVarTypeSize = 64;
+    mlir::emitWarning(converter.getCurrentLocation(),
+                      "OpenMP loop iteration variable cannot have more than 64 "
+                      "bits size and will be narrowed into 64 bits.");
+  }
+  assert((loopVarTypeSize == 32 || loopVarTypeSize == 64) &&
+         "OpenMP loop iteration variable size must be transformed into 32-bit "
+         "or 64-bit");
+  return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize);
+}
+
 void gatherFuncAndVarSyms(
     const Fortran::parser::OmpObjectList &objList,
     mlir::omp::DeclareTargetCaptureClause clause,
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 00b0165cec554..369654e546290 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -68,6 +68,9 @@ void genObjectList(const Fortran::parser::OmpObjectList &objectList,
                    Fortran::lower::AbstractConverter &converter,
                    llvm::SmallVectorImpl<mlir::Value> &operands);
 
+mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter,
+                          std::size_t loopVarTypeSize);
+
 mlir::omp::TargetOp findParentTargetOp(mlir::OpBuilder &builder);
 
 } // namespace omp
diff --git a/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp b/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp
index f5a3d925ab5d9..e242dc5df6916 100644
--- a/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp
@@ -105,9 +105,12 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
     step.push_back(rewriter.clone(*stepOp)->getResult(0));
     // ==== TODO (1) End ====
 
-    auto wsLoopOp = rewriter.create<mlir::omp::WsLoopOp>(
-        doLoop.getLoc(), lowerBound, upperBound, step);
-    wsLoopOp.setInclusive(true);
+    auto wsLoopOp = rewriter.create<mlir::omp::WsLoopOp>(doLoop.getLoc());
+    rewriter.createBlock(&wsLoopOp.getRegion());
+
+    // TODO Test that this didn't break something.
+    auto loopNestOp = rewriter.create<mlir::omp::LoopNestOp>(
+        doLoop.getLoc(), lowerBound, upperBound, step, /*inclusive=*/true);
 
     auto outlineableOp =
         mlir::dyn_cast<mlir::omp::OutlineableOpenMPOpInterface>(*parallelOp);
@@ -180,11 +183,11 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
 
     // Clone the loop's body inside the worksharing construct using the mapped
     // memref values.
-    rewriter.cloneRegionBefore(doLoop.getRegion(), wsLoopOp.getRegion(),
-                               wsLoopOp.getRegion().begin(), mapper);
+    rewriter.cloneRegionBefore(doLoop.getRegion(), loopNestOp.getRegion(),
+                               loopNestOp.getRegion().begin(), mapper);
 
-    mlir::Operation *terminator = wsLoopOp.getRegion().back().getTerminator();
-    rewriter.setInsertionPointToEnd(&wsLoopOp.getRegion().back());
+    mlir::Operation *terminator = loopNestOp.getRegion().back().getTerminator();
+    rewriter.setInsertionPointToEnd(&loopNestOp.getRegion().back());
     rewriter.create<mlir::omp::YieldOp>(terminator->getLoc());
     rewriter.eraseOp(terminator);
 
diff --git a/flang/test/Lower/OpenMP/FIR/copyin.f90 b/flang/test/Lower/OpenMP/FIR/copyin.f90
index 20023a81977ae..7161722b21090 100644
--- a/flang/test/Lower/OpenMP/FIR/copyin.f90
+++ b/flang/test/Lower/OpenMP/FIR/copyin.f90
@@ -145,7 +145,8 @@ subroutine copyin_derived_type()
 ! CHECK:           %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:           %[[VAL_7:.*]] = fir.load %[[VAL_4]] : !fir.ref<i32>
 ! CHECK:           %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:           omp.wsloop   for  (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:           omp.wsloop   {
+! CHECK:           omp.loopnest  (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
 ! CHECK:             fir.store %[[VAL_9]] to %[[VAL_3]] : !fir.ref<i32>
 ! CHECK:             fir.call @_QPsub4(%[[VAL_4]]) {{.*}}: (!fir.ref<i32>) -> ()
 ! CHECK:             omp.yield
@@ -286,7 +287,8 @@ subroutine common_1()
 !CHECK: %[[val_c1_i32:.*]] = arith.constant 1 : i32
 !CHECK: %[[val_19:.*]] = fir.load %[[val_13]] : !fir.ref<i32>
 !CHECK: %[[val_c1_i32_2:.*]] = arith.constant 1 : i32
-!CHECK: omp.wsloop   for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_19]]) inclusive step (%[[val_c1_i32_2]]) {
+!CHECK: omp.wsloop   {
+!CHECK: omp.loopnest (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_19]]) inclusive step (%[[val_c1_i32_2]]) {
 !CHECK: fir.store %[[arg]] to %[[val_9]] : !fir.ref<i32>
 !CHECK: %[[val_20:.*]] = fir.load %[[val_16]] : !fir.ref<i32>
 !CHECK: %[[val_21:.*]] = fir.load %[[val_9]] : !fir.ref<i32>
@@ -303,7 +305,7 @@ subroutine common_2()
   integer :: y
   common /d/ x, y
   !$omp threadprivate(/d/)
-  
+
   !$omp parallel do copyin(/d/)
      do i = 1, x
         y = y + i
diff --git a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90
index 389bcba35f77f..f779fc9a775cb 100644
--- a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90
+++ b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90
@@ -1,4 +1,4 @@
-! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s 
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
 
 !CHECK: func.func @_QPlastprivate_common() {
 !CHECK: %[[val_0:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
@@ -17,7 +17,8 @@
 !CHECK: %[[val_c1_i32:.*]] = arith.constant 1 : i32
 !CHECK: %[[val_c100_i32:.*]] = arith.constant 100 : i32
 !CHECK: %[[val_c1_i32_0:.*]] = arith.constant 1 : i32
-!CHECK: omp.wsloop   for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_c100_i32]]) inclusive step (%[[val_c1_i32_0]]) {
+!CHECK: omp.wsloop   {
+!CHECK: omp.loopnest (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_c100_i32]]) inclusive step (%[[val_c1_i32_0]]) {
 !CHECK: fir.store %[[arg]] to %[[val_0]] : !fir.ref<i32>
 !CHECK: %[[val_11:.*]] = arith.addi %[[arg]], %[[val_c1_i32_0]] : i32
 !CHECK: %[[val_c0_i32:.*]] = arith.constant 0 : i32
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90
index 2060e2062c1a3..68867a0bd4149 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90
@@ -9,16 +9,17 @@
 !CHECK-DAG: %[[ARG1_REF:.*]] = fir.convert %[[ARG1_UNBOX]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.char<1,5>>
 
 !CHECK: omp.parallel {
-!CHECK-DAG: %[[ARG1_PVT:.*]] = fir.alloca !fir.char<1,5> {bindc_name = "arg1", 
+!CHECK-DAG: %[[ARG1_PVT:.*]] = fir.alloca !fir.char<1,5> {bindc_name = "arg1",
 
 ! Check that we are accessing the clone inside the loop
-!CHECK-DAG: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK-DAG: omp.wsloop {
+!CHECK-DAG: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 !CHECK-DAG: %[[UNIT:.*]] = arith.constant 6 : i32
 !CHECK-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQclX
-!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] 
+!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]]
 !CHECK-NEXT: %[[CNST:.*]] = arith.constant
 !CHECK-NEXT: %[[CALL_BEGIN_IO:.*]] = fir.call @_FortranAioBeginExternalListOutput(%[[UNIT]], %[[CVT0]], %[[CNST]]) {{.*}}: (i32, !fir.ref<i8>, i32) -> !fir.ref<i8>
-!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT]] 
+!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT]]
 !CHECK-NEXT: %[[CVT_0_2:.*]] = fir.convert %[[FIVE]]
 !CHECK-NEXT: %[[CALL_OP_ASCII:.*]] = fir.call @_FortranAioOutputAscii(%[[CALL_BEGIN_IO]], %[[CVT_0_1]], %[[CVT_0_2]])
 !CHECK-NEXT: %[[CALL_END_IO:.*]] = fir.call @_FortranAioEndIoStatement(%[[CALL_BEGIN_IO]])
@@ -37,12 +38,12 @@
 !CHECK-DAG: %[[CVT:.*]] = fir.convert %[[ARG1_REF]] : (!fir.ref<!fir.char<1,5>>) -> !fir.ref<i8>
 !CHECK-DAG: %[[CVT1:.*]] = fir.convert %[[ARG1_PVT]] : (!fir.ref<!fir.char<1,5>>) -> !fir.ref<i8>
 !CHECK-DAG: fir.call @llvm.memmove.p0.p0.i64(%[[CVT]], %[[CVT1]]{{.*}})
-!CHECK-DAG: } 
+!CHECK-DAG: }
 !CHECK-DAG: omp.yield
 
 subroutine lastprivate_character(arg1)
         character(5) :: arg1
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1)
 do n = 1, 5
         arg1(n:n) = 'c'
@@ -55,7 +56,8 @@ subroutine lastprivate_character(arg1)
 !CHECK: func @_QPlastprivate_int(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}) {
 !CHECK-DAG: omp.parallel  {
 !CHECK-DAG: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "arg1"
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -75,7 +77,7 @@ subroutine lastprivate_character(arg1)
 
 subroutine lastprivate_int(arg1)
         integer :: arg1
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1)
 do n = 1, 5
         arg1 = 2
@@ -90,7 +92,8 @@ subroutine lastprivate_int(arg1)
 !CHECK: omp.parallel  {
 !CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -111,7 +114,7 @@ subroutine lastprivate_int(arg1)
 
 subroutine mult_lastprivate_int(arg1, arg2)
         integer :: arg1, arg2
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1) LASTPRIVATE(arg2)
 do n = 1, 5
         arg1 = 2
@@ -127,7 +130,8 @@ subroutine mult_lastprivate_int(arg1, arg2)
 !CHECK: omp.parallel  {
 !CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 !Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -148,7 +152,7 @@ subroutine mult_lastprivate_int(arg1, arg2)
 
 subroutine mult_lastprivate_int2(arg1, arg2)
         integer :: arg1, arg2
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1, arg2)
 do n = 1, 5
         arg1 = 2
@@ -169,7 +173,8 @@ subroutine mult_lastprivate_int2(arg1, arg2)
 ! Lastprivate Allocation
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK-NOT: omp.barrier
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -188,7 +193,7 @@ subroutine mult_lastprivate_int2(arg1, arg2)
 
 subroutine firstpriv_lastpriv_int(arg1, arg2)
         integer :: arg1, arg2
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg2)
 do n = 1, 5
         arg1 = 2
@@ -207,7 +212,8 @@ subroutine firstpriv_lastpriv_int(arg1, arg2)
 !CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
 !CHECK-NEXT: fir.store %[[FPV_LD]] to %[[CLONE1]] : !fir.ref<i32>
 !CHECK-NEXT: omp.barrier
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
 !CHECK: %[[C0:.*]] = arith.constant 0 : i32
@@ -225,7 +231,7 @@ subroutine firstpriv_lastpriv_int(arg1, arg2)
 
 subroutine firstpriv_lastpriv_int2(arg1)
         integer :: arg1
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg1)
 do n = 1, 5
         arg1 = 2
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90
index c99bf761333b8..ce46925327ba1 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90
@@ -13,7 +13,8 @@
 ! CHECK:           %[[ONE:.*]] = arith.constant 1 : i32
 ! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_4:.*]] : !fir.ref<i32>
 ! CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:           omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) {
+! CHECK:           omp.wsloop {
+! CHECK:           omp.loopnest (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) {
 ! CHECK:             fir.store %[[VAL_6]] to %[[PRIV_I]] : !fir.ref<i32>
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90
index 8f5d280943cc2..204909e30cb96 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90
@@ -259,7 +259,8 @@ subroutine simple_loop_1
   ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:     omp.wsloop {
+  ! FIRDialect:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO
   do i=1, 9
   ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
@@ -295,7 +296,8 @@ subroutine simple_loop_2
   ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:     omp.wsloop {
+  ! FIRDialect:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO PRIVATE(r)
   do i=1, 9
   ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
@@ -330,7 +332,8 @@ subroutine simple_loop_3
   ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:     omp.wsloop {
+  ! FIRDialect:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO PRIVATE(r)
   do i=1, 9
   ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90
index 6eb39a2f63725..b26e618693316 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90
@@ -3,7 +3,7 @@
 
 ! RUN: bbc -fopenmp -emit-fir -hlfir=false %s -o - | FileCheck %s
 
-! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"}) 
+! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"})
 subroutine omp_do_firstprivate(a)
   integer::a
   integer::n
@@ -17,7 +17,8 @@ subroutine omp_do_firstprivate(a)
   ! CHECK: %[[LB:.*]] = arith.constant 1 : i32
   ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: omp.wsloop   for  (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+  ! CHECK-NEXT: omp.wsloop   {
+  ! CHECK-NEXT: omp.loopnest  (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
   ! CHECK-NEXT: fir.store %[[ARG1]] to %[[REF]] : !fir.ref<i32>
   ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK-NEXT: omp.yield
@@ -29,7 +30,7 @@ subroutine omp_do_firstprivate(a)
   call bar(a)
 end subroutine omp_do_firstprivate
 
-! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) 
+! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"})
 subroutine omp_do_firstprivate2(a, n)
   integer::a
   integer::n
@@ -48,7 +49,8 @@ subroutine omp_do_firstprivate2(a, n)
   ! CHECK: %[[LB:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
   ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: omp.wsloop   for  (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+  ! CHECK-NEXT: omp.wsloop   {
+  ! CHECK-NEXT: omp.loopnest  (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
   ! CHECK-NEXT: fir.store %[[ARG2]] to %[[REF]] : !fir.ref<i32>
   ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK-NEXT: omp.yield
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90
index 8649cf284ffd9..b1003253822da 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90
@@ -9,7 +9,8 @@ subroutine simple_parallel_do
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
@@ -35,7 +36,8 @@ subroutine parallel_do_with_parallel_clauses(cond, nt)
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
@@ -58,7 +60,8 @@ subroutine parallel_do_with_clauses(nt)
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop schedule(dynamic) {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
@@ -90,7 +93,8 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt)
   ! CHECK:    %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:    %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:    %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:    omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:    omp.wsloop {
+  ! CHECK:    omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
@@ -140,7 +144,8 @@ end subroutine parallel_private_do
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV]] : !fir.ref<i32>
 ! CHECK:               fir.call @_QPfoo(%[[I_PRIV]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
@@ -182,7 +187,8 @@ end subroutine omp_parallel_multiple_firstprivate_do
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
 ! CHECK:               fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
@@ -224,7 +230,8 @@ end subroutine parallel_do_private
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
 ! CHECK:               fir.call @_QPfoo(%[[I_PRIV_ADDR]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
@@ -266,7 +273,8 @@ end subroutine omp_parallel_do_multiple_firstprivate
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
 ! CHECK:               fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
diff --git a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90
index d6c10bdee88d5..2c6f460ceacac 100644
--- a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90
+++ b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90
@@ -77,7 +77,8 @@ subroutine test_stop_in_region3()
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 10 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:         omp.wsloop   for  (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
+! CHECK:         omp.wsloop   {
+! CHECK:         omp.loopnest  (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
 ! CHECK:           fir.store %[[VAL_6]] to %[[VAL_0]] : !fir.ref<i32>
 ! CHECK:           cf.br ^bb1
 ! CHECK:         ^bb1:
diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90
index 3962603572ba0..9e113e2e59d8c 100644
--- a/flang/test/Lower/OpenMP/FIR/target.f90
+++ b/flang/test/Lower/OpenMP/FIR/target.f90
@@ -487,7 +487,8 @@ subroutine omp_target_parallel_do
          !CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
          !CHECK: %[[VAL_6:.*]] = arith.constant 1024 : i32
          !CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
-         !CHECK: omp.wsloop   for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_7]]) {
+         !CHECK: omp.wsloop   {
+         !CHECK: omp.loopnest  (%[[VAL_8:.*]]) : i32 = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_7]]) {
          !CHECK: fir.store %[[VAL_8]] to %[[VAL_4]] : !fir.ref<i32>
          !CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32
          !CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_4]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/unstructured.f90 b/flang/test/Lower/OpenMP/FIR/unstructured.f90
index bfaf38b7ef1af..390b3a8746e8b 100644
--- a/flang/test/Lower/OpenMP/FIR/unstructured.f90
+++ b/flang/test/Lower/OpenMP/FIR/unstructured.f90
@@ -67,14 +67,16 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct
 ! CHECK:   ^bb1:  // 2 preds: ^bb0, ^bb3
 ! CHECK:     cond_br %{{[0-9]*}}, ^bb2, ^bb4
 ! CHECK:   ^bb2:  // pred: ^bb1
-! CHECK:     omp.wsloop for (%[[ARG1:.*]]) : {{.*}} {
+! CHECK:     omp.wsloop {
+! CHECK:     omp.loopnest (%[[ARG1:.*]]) : {{.*}} {
 ! CHECK:       fir.store %[[ARG1]] to %[[ALLOCA_2]] : !fir.ref<i32>
 ! CHECK:     @_FortranAioBeginExternalListOutput
 ! CHECK:       %[[LOAD_1:.*]] = fir.load %[[ALLOCA_2]] : !fir.ref<i32>
 ! CHECK:     @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]])
 ! CHECK:       omp.yield
 ! CHECK:     }
-! CHECK:     omp.wsloop for (%[[ARG2:.*]]) : {{.*}} {
+! CHECK:     omp.wsloop {
+! CHECK:     omp.loopnest (%[[ARG2:.*]]) : {{.*}} {
 ! CHECK:       fir.store %[[ARG2]] to %[[ALLOCA_1]] : !fir.ref<i32>
 ! CHECK:       br ^bb1
 ! CHECK:     ^bb2:  // 2 preds: ^bb1, ^bb5
@@ -117,7 +119,8 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs
 ! CHECK-LABEL: func @_QPss4{{.*}} {
 ! CHECK:       omp.parallel {
 ! CHECK:         %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned}
-! CHECK:         omp.wsloop for (%[[ARG:.*]]) : {{.*}} {
+! CHECK:         omp.wsloop {
+! CHECK:         omp.loopnest (%[[ARG:.*]]) : {{.*}} {
 ! CHECK:           fir.store %[[ARG]] to %[[ALLOCA]] : !fir.ref<i32>
 ! CHECK:           %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}}
 ! CHECK:           %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}}
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90
index 4030f46299d0b..4b6498adb31de 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90
@@ -19,7 +19,8 @@ program wsloop
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 4 : i32
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_5]] : i32) nowait for  (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
+! CHECK:         omp.wsloop   schedule(static = %[[VAL_5]] : i32) nowait {
+! CHECK:         omp.loopnest  (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
 ! CHECK:           fir.store %[[ARG0]] to %[[STORE_IV:.*]] : !fir.ref<i32>
 ! CHECK:           %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]] : !fir.ref<i32>
 ! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
@@ -37,7 +38,8 @@ program wsloop
 ! CHECK:         %[[VAL_15:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_17:.*]] = arith.constant 4 : i32
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_17]] : i32) nowait for  (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
+! CHECK:         omp.wsloop   schedule(static = %[[VAL_17]] : i32) nowait {
+! CHECK:         omp.loopnest  (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
 ! CHECK:           fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i32>
 ! CHECK:           %[[VAL_24:.*]] = arith.constant 2 : i32
 ! CHECK:           %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref<i32>
@@ -45,7 +47,7 @@ program wsloop
 ! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
 ! CHECK:           omp.yield
 ! CHECK:         }
-  
+
 end do
 !$OMP END DO NOWAIT
 chunk = 6
@@ -61,7 +63,8 @@ program wsloop
 ! CHECK:         %[[VAL_30:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_31:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_32:.*]] = fir.load %[[VAL_0]] : !fir.ref<i32>
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_32]] : i32) nowait for  (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
+! CHECK:         omp.wsloop   schedule(static = %[[VAL_32]] : i32) nowait {
+! CHECK:         omp.loopnest  (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
 ! CHECK:           fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref<i32>
 ! CHECK:           %[[VAL_39:.*]] = arith.constant 3 : i32
 ! CHECK:           %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90
index 933fc0910e338..aa66b49edf9ec 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90
@@ -39,7 +39,8 @@ program wsloop_collapse
   do i = 1, a
      do j= 1, b
         do k = 1, c
-! CHECK:           omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) {
+! CHECK:           omp.wsloop {
+! CHECK:           omp.loopnest (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) {
 ! CHECK:             fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90
index 1c381475f6cbb..70f0e7d00b4f9 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90
@@ -15,7 +15,8 @@ program wsloop_dynamic
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(dynamic, monotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+!CHECK:     omp.wsloop schedule(dynamic, monotonic) nowait {
+!CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
 !CHECK:       fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
 
   do i=1, 9
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90
index 3f425200b8fa4..9170a75b8248e 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90
@@ -16,7 +16,8 @@ program wsloop_dynamic
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(dynamic, nonmonotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+!CHECK:     omp.wsloop schedule(dynamic, nonmonotonic) nowait {
+!CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
 !CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
 
   do i=1, 9
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90
index 7548d7a597228..9b3daba2f170b 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90
@@ -6,7 +6,8 @@
 subroutine wsloop_ordered_no_para()
   integer :: a(10), i
 
-! CHECK:  omp.wsloop ordered(0) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+! CHECK:  omp.wsloop ordered(0) {
+! CHECK:  omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
 ! CHECK:    omp.yield
 ! CHECK:  }
 
@@ -25,7 +26,8 @@ subroutine wsloop_ordered_with_para()
   integer :: a(10), i
 
 ! CHECK: func @_QPwsloop_ordered_with_para() {
-! CHECK:  omp.wsloop ordered(1) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+! CHECK:  omp.wsloop ordered(1) {
+! CHECK:  omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
 ! CHECK:    omp.yield
 ! CHECK:  }
 
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90
index 5664529416fe8..a620cd4852965 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90
@@ -61,7 +61,8 @@
 ! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
 ! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
@@ -97,7 +98,8 @@ subroutine simple_int_reduction
 ! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
 ! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<f32>
 ! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
@@ -133,7 +135,8 @@ subroutine simple_real_reduction
 ! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
 ! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
@@ -210,7 +213,8 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<i32>)  for  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
 ! CHECK:               fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
@@ -263,7 +267,8 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<f32>)  for  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
 ! CHECK:               fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<f32>
 ! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
@@ -322,7 +327,8 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_2]] -> %[[VAL_13:.*]] : !fir.ref<i32>, @add_reduction_i_64 %[[VAL_3]] -> %[[VAL_14:.*]] : !fir.ref<i64>, @add_reduction_f_32 %[[VAL_4]] -> %[[VAL_15:.*]] : !fir.ref<f32>, @add_reduction_f_64 %[[VAL_1]] -> %[[VAL_16:.*]] : !fir.ref<f64>)  for  (%[[VAL_17:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_2]] -> %[[VAL_13:.*]] : !fir.ref<i32>, @add_reduction_i_64 %[[VAL_3]] -> %[[VAL_14:.*]] : !fir.ref<i64>, @add_reduction_f_32 %[[VAL_4]] -> %[[VAL_15:.*]] : !fir.ref<f32>, @add_reduction_f_64 %[[VAL_1]] -> %[[VAL_16:.*]] : !fir.ref<f64>)  {
+! CHECK:             omp.loopnest  (%[[VAL_17:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
 ! CHECK:               fir.store %[[VAL_17]] to %[[VAL_9]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90
index 9ce1725dbab04..c5401204c0ca6 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90
@@ -13,7 +13,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iandEx"}
 !CHECK: omp.parallel
-!CHECK: omp.wsloop reduction(@[[IAND_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: omp.wsloop reduction(@[[IAND_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK: omp.loopnest
 !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90
index f6027416246af..7775bb4c2dc97 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90
@@ -13,7 +13,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_ieorEx"}
 !CHECK: omp.parallel
-!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK: omp.loopnest
 !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90
index bc143611abe8d..cef02492931c1 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90
@@ -10,10 +10,11 @@
 !CHECK: omp.yield(%[[IOR_VAL_I]] : i32)
 
 !CHECK-LABEL: @_QPreduction_ior
-!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>> 
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iorEx"}
 !CHECK: omp.parallel
-!CHECK: omp.wsloop reduction(@[[IOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for 
+!CHECK: omp.wsloop reduction(@[[IOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK: omp.loopnest
 !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90
index d5aacd74d8b10..7592da1a8844d 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90
@@ -30,7 +30,8 @@
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
 ! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
 ! CHECK:               %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
@@ -72,7 +73,8 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
 ! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
@@ -122,7 +124,8 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
 ! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90
index 9f44e0e26d407..b8cdf4bf46c7f 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90
@@ -31,7 +31,8 @@
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
 ! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
 ! CHECK:               %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
@@ -73,7 +74,8 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
 ! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
@@ -123,7 +125,8 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
 ! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90
index af79658491b56..85702c5dc020d 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90
@@ -21,7 +21,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop reduction(@[[MAX_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK:     omp.wsloop reduction(@[[MAX_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK:     omp.loopnest
 !CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
@@ -34,7 +35,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xf32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_max_realEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop reduction(@[[MAX_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) for
+!CHECK:     omp.wsloop reduction(@[[MAX_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) {
+!CHECK:     omp.loopnest
 !CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
 !CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90
index 1095718b4b13f..b82d943009e90 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90
@@ -21,7 +21,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_min_intEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop reduction(@[[MIN_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK:     omp.wsloop reduction(@[[MIN_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK:     omp.loopnest
 !CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
@@ -35,7 +36,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xf32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_min_realEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop reduction(@[[MIN_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) for
+!CHECK:     omp.wsloop reduction(@[[MIN_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) {
+!CHECK:     omp.loopnest
 !CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
 !CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90
index 2e3f8ca3c207d..446ad4279a682 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90
@@ -14,7 +14,8 @@ program wsloop_dynamic
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(runtime, simd) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+!CHECK:     omp.wsloop schedule(runtime, simd) nowait {
+!CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
 !CHECK:       fir.store %[[I]] to %[[STORE:.*]] : !fir.ref<i32>
 
   do i=1, 9
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90
index 4f34f30f3e7c9..f00b7ec0979cb 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90
@@ -22,7 +22,8 @@ program wsloop_variable
 !CHECK:  %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64
 !CHECK:  %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
 !CHECK:  %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:  omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
+!CHECK:  omp.wsloop {
+!CHECK:  omp.loopnest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
 !CHECK:    %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16
 !CHECK:    fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]] : !fir.ref<i16>
 !CHECK:    fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i64>
@@ -46,7 +47,8 @@ program wsloop_variable
 !CHECK:  %[[TMP12:.*]] = arith.constant 1 : i32
 !CHECK:  %[[TMP13:.*]] = fir.convert %{{.*}} : (i8) -> i32
 !CHECK:  %[[TMP14:.*]] = fir.convert %{{.*}} : (i64) -> i32
-!CHECK:  omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]])  {
+!CHECK:  omp.wsloop {
+!CHECK:  omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]])  {
 !CHECK:    %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16
 !CHECK:    fir.store %[[ARG0_I16]] to %[[STORE3:.*]] : !fir.ref<i16>
 !CHECK:    %[[LOAD3:.*]] = fir.load %[[STORE3]] : !fir.ref<i16>
@@ -65,7 +67,8 @@ program wsloop_variable
 !CHECK:  %[[TMP17:.*]] = fir.convert %{{.*}} : (i8) -> i64
 !CHECK:  %[[TMP18:.*]] = fir.convert %{{.*}} : (i16) -> i64
 !CHECK:  %[[TMP19:.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:  omp.wsloop for (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]])  {
+!CHECK:  omp.wsloop {
+!CHECK:  omp.loopnest (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]])  {
 !CHECK:    %[[ARG1_I128:.*]] = fir.convert %[[ARG1]] : (i64) -> i128
 !CHECK:    fir.store %[[ARG1_I128]] to %[[STORE4:.*]] : !fir.ref<i128>
 !CHECK:    %[[LOAD4:.*]] = fir.load %[[STORE4]] : !fir.ref<i128>
@@ -97,7 +100,8 @@ end program wsloop_variable
 !CHECK:         %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i16>
 !CHECK:         %[[VAL_10:.*]] = fir.convert %[[VAL_8]] : (i8) -> i32
 !CHECK:         %[[VAL_11:.*]] = fir.convert %[[VAL_9]] : (i16) -> i32
-!CHECK:         omp.wsloop   for  (%[[ARG0:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+!CHECK:         omp.wsloop   {
+!CHECK:         omp.loopnest  (%[[ARG0:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
 !CHECK:           %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16
 !CHECK:           fir.store %[[ARG0_I16]] to %[[STORE_IV:.*]] : !fir.ref<i16>
 !CHECK:           %[[VAL_13:.*]] = fir.load %[[VAL_0]] : !fir.ref<i128>
@@ -146,7 +150,8 @@ subroutine wsloop_variable_sub
 !CHECK:         %[[C1:.*]] = arith.constant 1 : i32
 !CHECK:         %[[C10:.*]] = arith.constant 10 : i32
 !CHECK:         %[[C1_2:.*]] = arith.constant 1 : i32
-!CHECK:         omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[C1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) {
+!CHECK:         omp.wsloop {
+!CHECK:         omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[C1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) {
 !CHECK:           %[[ARG0_I8:.*]] = fir.convert %[[ARG0]] : (i32) -> i8
 !CHECK:           fir.store %[[ARG0_I8]] to %[[IV2]] : !fir.ref<i8>
 !CHECK:           %[[IV2LOAD:.*]] = fir.load %[[IV2]] : !fir.ref<i8>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop.f90 b/flang/test/Lower/OpenMP/FIR/wsloop.f90
index abc0489b08ff5..66972c73d2eb0 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop.f90
@@ -11,7 +11,8 @@ subroutine simple_loop
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO
   do i=1, 9
   ! CHECK:             fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
@@ -34,7 +35,8 @@ subroutine simple_loop_with_step
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 2 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   ! CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
   ! CHECK:       %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
   !$OMP DO
@@ -57,7 +59,8 @@ subroutine loop_with_schedule_nowait
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop schedule(runtime) nowait {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO SCHEDULE(runtime)
   do i=1, 9
   ! CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
index 5c624d31b5f36..4c084d81ffa89 100644
--- a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
+++ b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
@@ -12,7 +12,8 @@
 ! CHECK: %[[const_1:.*]] = arith.constant 1 : i32
 ! CHECK: %[[const_2:.*]] = arith.constant 10 : i32
 ! CHECK: %[[const_3:.*]] = arith.constant 1 : i32
-! CHECK: omp.wsloop   for  (%[[ARG:.*]]) : i32 = (%[[const_1]]) to (%[[const_2]]) inclusive step (%[[const_3]]) {
+! CHECK: omp.wsloop   {
+! CHECK: omp.loopnest  (%[[ARG:.*]]) : i32 = (%[[const_1]]) to (%[[const_2]]) inclusive step (%[[const_3]]) {
 ! CHECK: fir.store %[[ARG]] to %[[TEMP]] : !fir.ref<i32>
 ! EXPECTED: %[[temp_1:.*]] = fir.load %[[PRIVATE_Z]] : !fir.ref<i32>
 ! CHECK: %[[temp_1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/default-clause.f90 b/flang/test/Lower/OpenMP/default-clause.f90
index 0e118742689d6..401ed4f612467 100644
--- a/flang/test/Lower/OpenMP/default-clause.f90
+++ b/flang/test/Lower/OpenMP/default-clause.f90
@@ -192,7 +192,7 @@ subroutine nested_default_clause_tests
 !CHECK: %[[INNER_PRIVATE_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFnested_default_clause_testsEk"}
 !CHECK: %[[INNER_PRIVATE_K_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_K]] {uniq_name = "_QFnested_default_clause_testsEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_K_DECL]]#0 : !fir.ref<i32>
-!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_K_DECL]]#0 temporary_lhs : i32, !fir.ref<i32> 
+!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_K_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 !CHECK: %[[CONST:.*]] = arith.constant 30 : i32
 !CHECK: hlfir.assign %[[CONST]] to %[[PRIVATE_Y_DECL]]#0 : i32, !fir.ref<i32>
 !CHECK: %[[CONST:.*]] = arith.constant 40 : i32
@@ -205,21 +205,21 @@ subroutine nested_default_clause_tests
 !CHECK: }
 !CHECK: omp.terminator
 !CHECK: }
-    !$omp parallel  firstprivate(x) private(y) shared(w) default(private)  
+    !$omp parallel  firstprivate(x) private(y) shared(w) default(private)
         !$omp parallel default(private)
            y = 20
-           x = 10 
-        !$omp end parallel 
+           x = 10
+        !$omp end parallel
 
-        !$omp parallel default(firstprivate) shared(y) private(w) 
+        !$omp parallel default(firstprivate) shared(y) private(w)
             y = 30
-            w = 40 
+            w = 40
             z = 50
             k = 40
         !$omp end parallel
     !$omp end parallel
-    
-    
+
+
 !CHECK: omp.parallel {
 !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
@@ -260,8 +260,8 @@ subroutine nested_default_clause_tests
         !$omp parallel default(private) shared(z)
             w = x + z
         !$omp end parallel
-    !$omp end parallel    
-    
+    !$omp end parallel
+
 !CHECK: omp.parallel {
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
 !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -319,7 +319,7 @@ subroutine nested_default_clause_tests
 !CHECK: omp.terminator
 !CHECK: }
 !CHECK: return
-!CHECK: } 
+!CHECK: }
 	!$omp parallel default(firstprivate)
 		!$omp single
 			x = y
@@ -352,7 +352,8 @@ subroutine skipped_default_clause_checks()
        type(it)::iii
 
 !CHECK: omp.parallel {
-!CHECK: omp.wsloop reduction(@min_i_32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) for (%[[ARG:.*]]) {{.*}} {
+!CHECK: omp.wsloop reduction(@min_i_32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK: omp.loopnest (%[[ARG:.*]]) {{.*}} {
 !CHECK: omp.yield
 !CHECK: }
 !CHECK: omp.terminator
diff --git a/flang/test/Lower/OpenMP/hlfir-wsloop.f90 b/flang/test/Lower/OpenMP/hlfir-wsloop.f90
index b6be77fe3016d..307be89fb5ecb 100644
--- a/flang/test/Lower/OpenMP/hlfir-wsloop.f90
+++ b/flang/test/Lower/OpenMP/hlfir-wsloop.f90
@@ -12,7 +12,8 @@ subroutine simple_loop
   !$OMP PARALLEL
   ! CHECK-DAG:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
   ! CHECK:     %[[IV:.*]]    = fir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_ST]]) to (%[[WS_END]]) inclusive step (%[[WS_ST]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_ST]]) to (%[[WS_END]]) inclusive step (%[[WS_ST]])
   !$OMP DO
   do i=1, 9
   ! CHECK:             fir.store %[[I]] to %[[IV:.*]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/lastprivate-commonblock.f90
index a11bdee156637..45450a5a8fd9b 100644
--- a/flang/test/Lower/OpenMP/lastprivate-commonblock.f90
+++ b/flang/test/Lower/OpenMP/lastprivate-commonblock.f90
@@ -15,7 +15,8 @@
 !CHECK:    %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X_REF]] {uniq_name = "_QFlastprivate_commonEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 !CHECK:    %[[PRIVATE_Y_REF:.*]] = fir.alloca f32 {bindc_name = "y", pinned, uniq_name = "_QFlastprivate_commonEy"}
 !CHECK:    %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y_REF]] {uniq_name = "_QFlastprivate_commonEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:    omp.wsloop   for  (%[[I:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+!CHECK:    omp.wsloop   {
+!CHECK:    omp.loopnest  (%[[I:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
 !CHECK:      %[[V:.*]] = arith.addi %[[I]], %{{.*}} : i32
 !CHECK:      %[[C0:.*]] = arith.constant 0 : i32
 !CHECK:      %[[NEG_STEP:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32
diff --git a/flang/test/Lower/OpenMP/lastprivate-iv.f90 b/flang/test/Lower/OpenMP/lastprivate-iv.f90
index 70fe500129d12..16204ed79b019 100644
--- a/flang/test/Lower/OpenMP/lastprivate-iv.f90
+++ b/flang/test/Lower/OpenMP/lastprivate-iv.f90
@@ -9,7 +9,8 @@
 !CHECK:    %[[LB:.*]] = arith.constant 4 : i32
 !CHECK:    %[[UB:.*]] = arith.constant 10 : i32
 !CHECK:    %[[STEP:.*]]  = arith.constant 3 : i32
-!CHECK:    omp.wsloop for  (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
+!CHECK:    omp.wsloop {
+!CHECK:    omp.loopnest  (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
 !CHECK:      fir.store %[[IV]] to %[[I]]#1 : !fir.ref<i32>
 !CHECK:      %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32
 !CHECK:      %[[C0:.*]] = arith.constant 0 : i32
@@ -41,7 +42,8 @@ subroutine lastprivate_iv_inc()
 !CHECK:    %[[LB:.*]] = arith.constant 10 : i32
 !CHECK:    %[[UB:.*]] = arith.constant 1 : i32
 !CHECK:    %[[STEP:.*]]  = arith.constant -3 : i32
-!CHECK:    omp.wsloop for  (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
+!CHECK:    omp.wsloop {
+!CHECK:    omp.loopnest  (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
 !CHECK:      fir.store %[[IV]] to %[[I]]#1 : !fir.ref<i32>
 !CHECK:      %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32
 !CHECK:      %[[C0:.*]] = arith.constant 0 : i32
diff --git a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
index 28f59c95d60bb..cf4f028987022 100644
--- a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
+++ b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
@@ -14,13 +14,14 @@
 !CHECK-DAG: %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] typeparams %[[FIVE]] {uniq_name = "_QFlastprivate_characterEarg1"} : (!fir.ref<!fir.char<1,5>>, index) -> (!fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>)
 
 ! Check that we are accessing the clone inside the loop
-!CHECK-DAG: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK-DAG: omp.wsloop {
+!CHECK-DAG: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 !CHECK-DAG: %[[UNIT:.*]] = arith.constant 6 : i32
 !CHECK-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQclX
-!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] 
+!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]]
 !CHECK-NEXT: %[[CNST:.*]] = arith.constant
 !CHECK-NEXT: %[[CALL_BEGIN_IO:.*]] = fir.call @_FortranAioBeginExternalListOutput(%[[UNIT]], %[[CVT0]], %[[CNST]]) {{.*}}: (i32, !fir.ref<i8>, i32) -> !fir.ref<i8>
-!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT_DECL]]#1 
+!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT_DECL]]#1
 !CHECK-NEXT: %[[CVT_0_2:.*]] = fir.convert %[[FIVE]]
 !CHECK-NEXT: %[[CALL_OP_ASCII:.*]] = fir.call @_FortranAioOutputAscii(%[[CALL_BEGIN_IO]], %[[CVT_0_1]], %[[CVT_0_2]])
 !CHECK-NEXT: %[[CALL_END_IO:.*]] = fir.call @_FortranAioEndIoStatement(%[[CALL_BEGIN_IO]])
@@ -37,12 +38,12 @@
 
 ! Testing lastprivate val update
 !CHECK-DAG: hlfir.assign %[[ARG1_PVT_DECL]]#0 to %[[ARG1_DECL]]#0 temporary_lhs : !fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>
-!CHECK-DAG: } 
+!CHECK-DAG: }
 !CHECK-DAG: omp.yield
 
 subroutine lastprivate_character(arg1)
         character(5) :: arg1
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1)
 do n = 1, 5
         arg1(n:n) = 'c'
@@ -57,7 +58,8 @@ subroutine lastprivate_character(arg1)
 !CHECK-DAG: omp.parallel  {
 !CHECK-DAG: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "arg1"
 !CHECK-DAG: %[[CLONE_DECL:.*]]:2 = hlfir.declare %[[CLONE]] {uniq_name = "_QFlastprivate_intEarg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -77,7 +79,7 @@ subroutine lastprivate_character(arg1)
 
 subroutine lastprivate_int(arg1)
         integer :: arg1
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1)
 do n = 1, 5
         arg1 = 2
@@ -96,7 +98,8 @@ subroutine lastprivate_int(arg1)
 !CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_intEarg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK-DAG: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFmult_lastprivate_intEarg2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -117,7 +120,7 @@ subroutine lastprivate_int(arg1)
 
 subroutine mult_lastprivate_int(arg1, arg2)
         integer :: arg1, arg2
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1) LASTPRIVATE(arg2)
 do n = 1, 5
         arg1 = 2
@@ -137,7 +140,8 @@ subroutine mult_lastprivate_int(arg1, arg2)
 !CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_int2Earg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK-DAG: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFmult_lastprivate_int2Earg2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 !Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -158,7 +162,7 @@ subroutine mult_lastprivate_int(arg1, arg2)
 
 subroutine mult_lastprivate_int2(arg1, arg2)
         integer :: arg1, arg2
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1, arg2)
 do n = 1, 5
         arg1 = 2
@@ -183,7 +187,8 @@ subroutine mult_lastprivate_int2(arg1, arg2)
 !CHECK: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFfirstpriv_lastpriv_intEarg2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK-NOT: omp.barrier
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -202,7 +207,7 @@ subroutine mult_lastprivate_int2(arg1, arg2)
 
 subroutine firstpriv_lastpriv_int(arg1, arg2)
         integer :: arg1, arg2
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg2)
 do n = 1, 5
         arg1 = 2
@@ -223,7 +228,8 @@ subroutine firstpriv_lastpriv_int(arg1, arg2)
 !CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<i32>
 !CHECK-NEXT: hlfir.assign %[[FPV_LD]] to %[[CLONE1_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 !CHECK-NEXT: omp.barrier
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
 !CHECK: %[[C0:.*]] = arith.constant 0 : i32
@@ -241,7 +247,7 @@ subroutine firstpriv_lastpriv_int(arg1, arg2)
 
 subroutine firstpriv_lastpriv_int2(arg1)
         integer :: arg1
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg1)
 do n = 1, 5
         arg1 = 2
diff --git a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
index 8533106b7ac48..53846bef2d4f9 100644
--- a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
+++ b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
@@ -21,7 +21,8 @@
 ! CHECK:           %[[ONE:.*]] = arith.constant 1 : i32
 ! CHECK:           %[[VAL_3:.*]] = fir.load %[[GAMA_DECL]]#0 : !fir.ref<i32>
 ! CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:           omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) {
+! CHECK:           omp.wsloop {
+! CHECK:           omp.loopnest (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) {
 ! CHECK:             fir.store %[[VAL_6]] to %[[PRIV_I_DECL]]#1 : !fir.ref<i32>
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
diff --git a/flang/test/Lower/OpenMP/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/parallel-private-clause.f90
index 5578b6710da7c..e870d8f8f0669 100644
--- a/flang/test/Lower/OpenMP/parallel-private-clause.f90
+++ b/flang/test/Lower/OpenMP/parallel-private-clause.f90
@@ -304,7 +304,8 @@ subroutine simple_loop_1
   ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:     omp.wsloop {
+  ! FIRDialect:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO
   do i=1, 9
   ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV_DECL]]#1 : !fir.ref<i32>
@@ -342,7 +343,8 @@ subroutine simple_loop_2
   ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:     omp.wsloop {
+  ! FIRDialect:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO PRIVATE(r)
   do i=1, 9
   ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV_DECL]]#1 : !fir.ref<i32>
@@ -379,7 +381,8 @@ subroutine simple_loop_3
   ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:     omp.wsloop {
+  ! FIRDialect:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO PRIVATE(r)
   do i=1, 9
   ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV_DECL:.*]]#1 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
index 716a7d71bb628..9eb05978b2d4a 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
@@ -3,7 +3,7 @@
 
 ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s
 
-! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"}) 
+! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"})
 subroutine omp_do_firstprivate(a)
   ! CHECK: %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_do_firstprivateEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   integer::a
@@ -20,7 +20,8 @@ subroutine omp_do_firstprivate(a)
   ! CHECK: %[[LB:.*]] = arith.constant 1 : i32
   ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: omp.wsloop   for  (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+  ! CHECK-NEXT: omp.wsloop   {
+  ! CHECK-NEXT: omp.loopnest  (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
   ! CHECK-NEXT: fir.store %[[ARG1]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
   ! CHECK-NEXT: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK-NEXT: omp.yield
@@ -32,7 +33,7 @@ subroutine omp_do_firstprivate(a)
   call bar(a)
 end subroutine omp_do_firstprivate
 
-! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) 
+! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"})
 subroutine omp_do_firstprivate2(a, n)
   ! CHECK:  %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_do_firstprivate2Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK:  %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFomp_do_firstprivate2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -56,7 +57,8 @@ subroutine omp_do_firstprivate2(a, n)
   ! CHECK: %[[LB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK: %[[UB:.*]] = fir.load %[[N_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK: omp.wsloop   for  (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+  ! CHECK: omp.wsloop   {
+  ! CHECK: omp.loopnest  (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
   ! CHECK: fir.store %[[ARG2]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
   ! CHECK: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK: omp.yield
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/parallel-wsloop.f90
index c06f941b74b58..fcb2791554ab0 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop.f90
@@ -9,7 +9,8 @@ subroutine simple_parallel_do
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
@@ -37,7 +38,8 @@ subroutine parallel_do_with_parallel_clauses(cond, nt)
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
@@ -61,7 +63,8 @@ subroutine parallel_do_with_clauses(nt)
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop schedule(dynamic) {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
@@ -97,7 +100,8 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt)
   ! CHECK:    %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:    %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:    %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:    omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:    omp.wsloop {
+  ! CHECK:    omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
@@ -150,7 +154,8 @@ end subroutine parallel_private_do
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
 ! CHECK:               fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
@@ -196,7 +201,8 @@ end subroutine omp_parallel_multiple_firstprivate_do
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
 ! CHECK:               fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
@@ -241,7 +247,8 @@ end subroutine parallel_do_private
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
 ! CHECK:               fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_PRIV_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
@@ -287,7 +294,8 @@ end subroutine omp_parallel_do_multiple_firstprivate
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
 ! CHECK:               fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
diff --git a/flang/test/Lower/OpenMP/stop-stmt-in-region.f90 b/flang/test/Lower/OpenMP/stop-stmt-in-region.f90
index fdbabc21b2c9e..fd80c563a8f4f 100644
--- a/flang/test/Lower/OpenMP/stop-stmt-in-region.f90
+++ b/flang/test/Lower/OpenMP/stop-stmt-in-region.f90
@@ -82,7 +82,8 @@ subroutine test_stop_in_region3()
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 10 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:         omp.wsloop   for  (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
+! CHECK:         omp.wsloop   {
+! CHECK:         omp.loopnest  (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
 ! CHECK:           fir.store %[[VAL_6]] to %[[VAL_0_DECL]]#1 : !fir.ref<i32>
 ! CHECK:           cf.br ^bb1
 ! CHECK:         ^bb1:
diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90
index 43598aff08bfe..e1c2a1f0966f5 100644
--- a/flang/test/Lower/OpenMP/target.f90
+++ b/flang/test/Lower/OpenMP/target.f90
@@ -594,7 +594,8 @@ subroutine omp_target_parallel_do
       !$omp target parallel do map(tofrom: a)
          !CHECK: %[[I_PVT_ALLOCA:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
          !CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_ALLOCA]] {uniq_name = "_QFomp_target_parallel_doEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-         !CHECK: omp.wsloop for  (%[[I_VAL:.*]]) : i32
+         !CHECK: omp.wsloop {
+         !CHECK: omp.loopnest  (%[[I_VAL:.*]]) : i32
          do i = 1, 1024
            !CHECK:   fir.store %[[I_VAL]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
            !CHECK:   %[[C10:.*]] = arith.constant 10 : i32
diff --git a/flang/test/Lower/OpenMP/unstructured.f90 b/flang/test/Lower/OpenMP/unstructured.f90
index e5bf980ce90fd..13d3c2bf04206 100644
--- a/flang/test/Lower/OpenMP/unstructured.f90
+++ b/flang/test/Lower/OpenMP/unstructured.f90
@@ -70,14 +70,16 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct
 ! CHECK:   ^bb1:  // 2 preds: ^bb0, ^bb3
 ! CHECK:     cond_br %{{[0-9]*}}, ^bb2, ^bb4
 ! CHECK:   ^bb2:  // pred: ^bb1
-! CHECK:     omp.wsloop for (%[[ARG1:.*]]) : {{.*}} {
+! CHECK:     omp.wsloop {
+! CHECK:     omp.loopnest (%[[ARG1:.*]]) : {{.*}} {
 ! CHECK:       fir.store %[[ARG1]] to %[[OMP_LOOP_K_DECL]]#1 : !fir.ref<i32>
 ! CHECK:     @_FortranAioBeginExternalListOutput
 ! CHECK:       %[[LOAD_1:.*]] = fir.load %[[OMP_LOOP_K_DECL]]#0 : !fir.ref<i32>
 ! CHECK:     @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]])
 ! CHECK:       omp.yield
 ! CHECK:     }
-! CHECK:     omp.wsloop for (%[[ARG2:.*]]) : {{.*}} {
+! CHECK:     omp.wsloop {
+! CHECK:     omp.loopnest (%[[ARG2:.*]]) : {{.*}} {
 ! CHECK:       fir.store %[[ARG2]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref<i32>
 ! CHECK:       br ^bb1
 ! CHECK:     ^bb2:  // 2 preds: ^bb1, ^bb5
@@ -121,7 +123,8 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs
 ! CHECK:       omp.parallel {
 ! CHECK:         %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned}
 ! CHECK:         %[[OMP_LOOP_J_DECL:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFss4Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:         omp.wsloop for (%[[ARG:.*]]) : {{.*}} {
+! CHECK:         omp.wsloop {
+! CHECK:         omp.loopnest (%[[ARG:.*]]) : {{.*}} {
 ! CHECK:           fir.store %[[ARG]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref<i32>
 ! CHECK:           %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}}
 ! CHECK:           %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}}
diff --git a/flang/test/Lower/OpenMP/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/wsloop-chunks.f90
index 5016c8985bda0..2fe787150de32 100644
--- a/flang/test/Lower/OpenMP/wsloop-chunks.f90
+++ b/flang/test/Lower/OpenMP/wsloop-chunks.f90
@@ -20,7 +20,8 @@ program wsloop
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 4 : i32
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_5]] : i32) nowait for  (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
+! CHECK:         omp.wsloop   schedule(static = %[[VAL_5]] : i32) nowait {
+! CHECK:         omp.loopnest  (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
 ! CHECK:           fir.store %[[ARG0]] to %[[STORE_IV:.*]]#1 : !fir.ref<i32>
 ! CHECK:           %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]]#0 : !fir.ref<i32>
 ! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
@@ -38,7 +39,8 @@ program wsloop
 ! CHECK:         %[[VAL_15:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_17:.*]] = arith.constant 4 : i32
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_17]] : i32) nowait for  (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
+! CHECK:         omp.wsloop   schedule(static = %[[VAL_17]] : i32) nowait {
+! CHECK:         omp.loopnest  (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
 ! CHECK:           fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref<i32>
 ! CHECK:           %[[VAL_24:.*]] = arith.constant 2 : i32
 ! CHECK:           %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]]#0 : !fir.ref<i32>
@@ -46,7 +48,7 @@ program wsloop
 ! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
 ! CHECK:           omp.yield
 ! CHECK:         }
-  
+
 end do
 !$OMP END DO NOWAIT
 chunk = 6
@@ -62,7 +64,8 @@ program wsloop
 ! CHECK:         %[[VAL_30:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_31:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_32:.*]] = fir.load %[[VAL_0]]#0 : !fir.ref<i32>
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_32]] : i32) nowait for  (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
+! CHECK:         omp.wsloop   schedule(static = %[[VAL_32]] : i32) nowait {
+! CHECK:         omp.loopnest  (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
 ! CHECK:           fir.store %[[ARG2]] to %[[STORE_IV2:.*]]#1 : !fir.ref<i32>
 ! CHECK:           %[[VAL_39:.*]] = arith.constant 3 : i32
 ! CHECK:           %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/wsloop-collapse.f90
index c93fcf4ef968d..85e1134e68720 100644
--- a/flang/test/Lower/OpenMP/wsloop-collapse.f90
+++ b/flang/test/Lower/OpenMP/wsloop-collapse.f90
@@ -49,7 +49,8 @@ program wsloop_collapse
 !CHECK:           %[[VAL_30:.*]] = arith.constant 1 : i32
 !CHECK:           %[[VAL_31:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
 !CHECK:           %[[VAL_32:.*]] = arith.constant 1 : i32
-!CHECK:           omp.wsloop   for  (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) {
+!CHECK:           omp.wsloop   {
+!CHECK:           omp.loopnest  (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) {
   !$omp do collapse(3)
   do i = 1, a
      do j= 1, b
diff --git a/flang/test/Lower/OpenMP/wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/wsloop-monotonic.f90
index fba9105b98181..5d0cfb337bb6e 100644
--- a/flang/test/Lower/OpenMP/wsloop-monotonic.f90
+++ b/flang/test/Lower/OpenMP/wsloop-monotonic.f90
@@ -15,7 +15,8 @@ program wsloop_dynamic
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(dynamic, monotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+!CHECK:     omp.wsloop schedule(dynamic, monotonic) nowait {
+!CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
 !CHECK:       fir.store %[[I]] to %[[ALLOCA_IV:.*]]#1 : !fir.ref<i32>
 
   do i=1, 9
diff --git a/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90
index 1bd7a2edc0f52..024c4ebf433fb 100644
--- a/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90
+++ b/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90
@@ -17,7 +17,8 @@ program wsloop_dynamic
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(dynamic, nonmonotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+!CHECK:     omp.wsloop schedule(dynamic, nonmonotonic) nowait {
+!CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
 !CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]]#1 : !fir.ref<i32>
 
   do i=1, 9
diff --git a/flang/test/Lower/OpenMP/wsloop-ordered.f90 b/flang/test/Lower/OpenMP/wsloop-ordered.f90
index 5185d2d085bac..5483b9228e035 100644
--- a/flang/test/Lower/OpenMP/wsloop-ordered.f90
+++ b/flang/test/Lower/OpenMP/wsloop-ordered.f90
@@ -6,7 +6,8 @@
 subroutine wsloop_ordered_no_para()
   integer :: a(10), i
 
-! CHECK:  omp.wsloop ordered(0) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+! CHECK:  omp.wsloop ordered(0) {
+! CHECK:  omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
 ! CHECK:    omp.yield
 ! CHECK:  }
 
@@ -25,7 +26,8 @@ subroutine wsloop_ordered_with_para()
   integer :: a(10), i
 
 ! CHECK: func @_QPwsloop_ordered_with_para() {
-! CHECK:  omp.wsloop ordered(1) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+! CHECK:  omp.wsloop ordered(1) {
+! CHECK:  omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
 ! CHECK:    omp.yield
 ! CHECK:  }
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90
index 4d30282fc8c21..bd9f68ecc7e4b 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90
@@ -27,7 +27,8 @@
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]])
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]])
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add.f90
index 7df4f37b98df8..a8f4e50f6a51c 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-add.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add.f90
@@ -68,7 +68,8 @@
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
@@ -108,7 +109,8 @@ subroutine simple_int_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
@@ -149,7 +151,8 @@ subroutine simple_real_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -188,7 +191,8 @@ subroutine simple_int_reduction_switch_order
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -236,7 +240,8 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
 ! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -297,7 +302,8 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
 ! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
@@ -365,7 +371,8 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_17:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_18:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @add_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @add_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @add_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>)  for  (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @add_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @add_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @add_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>)  {
+! CHECK:             omp.loopnest  (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
 ! CHECK:               fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90
index 9588531f6c909..6af928f6c9fb9 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90
@@ -29,7 +29,8 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@iand_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:             omp.wsloop reduction(@iand_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
 ! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90
index a14a37101874c..8a7d9366cf387 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90
@@ -19,7 +19,8 @@
 !CHECK: omp.parallel
 !CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_ieorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK: omp.loopnest
 !CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref<i32>
 !CHECK: %[[PRV_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90
index 3b5e327439358..8f28916724267 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90
@@ -29,7 +29,8 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@ior_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]])
+! CHECK:             omp.wsloop reduction(@ior_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]])
 ! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90
index 17d321620cca8..4303f15990ecc 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90
@@ -36,7 +36,8 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -84,7 +85,8 @@ end subroutine simple_reduction
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -141,7 +143,8 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
 ! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90
index 8204e4c878cb0..994477f4edcec 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90
@@ -36,7 +36,8 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -83,7 +84,8 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -140,7 +142,8 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
 ! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90
index 623368a50e864..29849b68be572 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90
@@ -36,7 +36,8 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -84,7 +85,8 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -143,7 +145,8 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
 ! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90
index f1ae1bc687cd5..719677ca729f2 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90
@@ -36,7 +36,8 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -83,7 +84,8 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -140,7 +142,8 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
 ! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90
index ed25cedae90c6..45ae04f3712ac 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90
@@ -29,7 +29,8 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:             omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
 ! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
index ea3b1bebce038..09856f66b1e2c 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
@@ -40,7 +40,8 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:             omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
 ! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -69,7 +70,8 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:             omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
 ! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -88,7 +90,8 @@
 ! CHECK:             %[[VAL_32:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_33:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>)  for  (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
+! CHECK:             omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
 ! CHECK:               fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
index 3aa9001869dc5..eaac48b9e1b5a 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
@@ -40,7 +40,8 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@min_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:             omp.wsloop reduction(@min_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
 ! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -69,7 +70,8 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:             omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
 ! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -90,7 +92,8 @@
 ! CHECK:             %[[VAL_32:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_33:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>)  for  (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
+! CHECK:             omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
 ! CHECK:               fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90
index 4774fba3f33e9..4656960a45ba2 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90
@@ -61,7 +61,8 @@
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
@@ -97,7 +98,8 @@ subroutine simple_int_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
@@ -134,7 +136,8 @@ subroutine simple_real_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -170,7 +173,8 @@ subroutine simple_int_reduction_switch_order
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -215,7 +219,8 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @multiply_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @multiply_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
 ! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -273,7 +278,8 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @multiply_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @multiply_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @multiply_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @multiply_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
 ! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
@@ -338,7 +344,8 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_17:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_18:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @multiply_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @multiply_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @multiply_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>)  for  (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @multiply_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @multiply_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @multiply_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>)  {
+! CHECK:             omp.loopnest  (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
 ! CHECK:               fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
diff --git a/flang/test/Lower/OpenMP/wsloop-simd.f90 b/flang/test/Lower/OpenMP/wsloop-simd.f90
index c3d5e3e0cda59..490f1665a7b1b 100644
--- a/flang/test/Lower/OpenMP/wsloop-simd.f90
+++ b/flang/test/Lower/OpenMP/wsloop-simd.f90
@@ -14,7 +14,8 @@ program wsloop_dynamic
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(runtime, simd) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+!CHECK:     omp.wsloop schedule(runtime, simd) nowait {
+!CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
 !CHECK:       fir.store %[[I]] to %[[STORE:.*]]#1 : !fir.ref<i32>
 
   do i=1, 9
diff --git a/flang/test/Lower/OpenMP/wsloop-unstructured.f90 b/flang/test/Lower/OpenMP/wsloop-unstructured.f90
index 7fe63a1fe607c..f5c1a8126edc1 100644
--- a/flang/test/Lower/OpenMP/wsloop-unstructured.f90
+++ b/flang/test/Lower/OpenMP/wsloop-unstructured.f90
@@ -29,7 +29,8 @@ end subroutine sub
 ! CHECK-SAME:                      %[[VAL_2:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "x"},
 ! CHECK-SAME:                      %[[VAL_3:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "y"}) {
 ! [...]
-! CHECK:             omp.wsloop for  (%[[VAL_53:.*]], %[[VAL_54:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) {
+! CHECK:             omp.wsloop {
+! CHECK:             omp.loopnest  (%[[VAL_53:.*]], %[[VAL_54:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) {
 ! [...]
 ! CHECK:               cf.br ^bb1
 ! CHECK:             ^bb1:
diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90
index b3758f1fdc00f..e2a71e3d1b888 100644
--- a/flang/test/Lower/OpenMP/wsloop-variable.f90
+++ b/flang/test/Lower/OpenMP/wsloop-variable.f90
@@ -22,7 +22,8 @@ program wsloop_variable
 !CHECK:  %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64
 !CHECK:  %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
 !CHECK:  %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:  omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
+!CHECK:  omp.wsloop {
+!CHECK:  omp.loopnest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
 !CHECK:    %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16
 !CHECK:    fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]]#1 : !fir.ref<i16>
 !CHECK:    fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref<i64>
@@ -46,7 +47,8 @@ program wsloop_variable
 !CHECK:  %[[TMP12:.*]] = arith.constant 1 : i32
 !CHECK:  %[[TMP13:.*]] = fir.convert %{{.*}} : (i8) -> i32
 !CHECK:  %[[TMP14:.*]] = fir.convert %{{.*}} : (i64) -> i32
-!CHECK:  omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]])  {
+!CHECK:  omp.wsloop {
+!CHECK:  omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]])  {
 !CHECK:    %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16
 !CHECK:    fir.store %[[ARG0_I16]] to %[[STORE3:.*]]#1 : !fir.ref<i16>
 !CHECK:    %[[LOAD3:.*]] = fir.load %[[STORE3]]#0 : !fir.ref<i16>
@@ -64,7 +66,8 @@ program wsloop_variable
 !CHECK:  %[[TMP17:.*]] = fir.convert %{{.*}} : (i8) -> i64
 !CHECK:  %[[TMP18:.*]] = fir.convert %{{.*}} : (i16) -> i64
 !CHECK:  %[[TMP19:.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:  omp.wsloop for (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]])  {
+!CHECK:  omp.wsloop {
+!CHECK:  omp.loopnest (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]])  {
 !CHECK:    %[[ARG1_I128:.*]] = fir.convert %[[ARG1]] : (i64) -> i128
 !CHECK:    fir.store %[[ARG1_I128]] to %[[STORE4:.*]]#1 : !fir.ref<i128>
 !CHECK:    %[[LOAD4:.*]] = fir.load %[[STORE4]]#0 : !fir.ref<i128>
@@ -118,7 +121,8 @@ subroutine wsloop_variable_sub
 !CHECK:           %[[VAL_24:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<i16>
 !CHECK:           %[[VAL_25:.*]] = fir.convert %[[VAL_23]] : (i8) -> i32
 !CHECK:           %[[VAL_26:.*]] = fir.convert %[[VAL_24]] : (i16) -> i32
-!CHECK:           omp.wsloop   for  (%[[VAL_27:.*]]) : i32 = (%[[VAL_22]]) to (%[[VAL_25]]) inclusive step (%[[VAL_26]]) {
+!CHECK:           omp.wsloop   {
+!CHECK:           omp.loopnest  (%[[VAL_27:.*]]) : i32 = (%[[VAL_22]]) to (%[[VAL_25]]) inclusive step (%[[VAL_26]]) {
 !CHECK:             %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> i16
 !CHECK:             fir.store %[[VAL_28]] to %[[VAL_3]]#1 : !fir.ref<i16>
 !CHECK:             %[[VAL_29:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i128>
@@ -160,7 +164,8 @@ subroutine wsloop_variable_sub
 !CHECK:           %[[VAL_50:.*]] = arith.constant 1 : i32
 !CHECK:           %[[VAL_51:.*]] = arith.constant 10 : i32
 !CHECK:           %[[VAL_52:.*]] = arith.constant 1 : i32
-!CHECK:           omp.wsloop   for  (%[[VAL_53:.*]]) : i32 = (%[[VAL_50]]) to (%[[VAL_51]]) inclusive step (%[[VAL_52]]) {
+!CHECK:           omp.wsloop   {
+!CHECK:           omp.loopnest  (%[[VAL_53:.*]]) : i32 = (%[[VAL_50]]) to (%[[VAL_51]]) inclusive step (%[[VAL_52]]) {
 !CHECK:             %[[VAL_54:.*]] = fir.convert %[[VAL_53]] : (i32) -> i8
 !CHECK:             fir.store %[[VAL_54]] to %[[VAL_1]]#1 : !fir.ref<i8>
 !CHECK:             %[[VAL_55:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i8>
diff --git a/flang/test/Lower/OpenMP/wsloop.f90 b/flang/test/Lower/OpenMP/wsloop.f90
index 4068f715c3e18..71cd0ddc2763b 100644
--- a/flang/test/Lower/OpenMP/wsloop.f90
+++ b/flang/test/Lower/OpenMP/wsloop.f90
@@ -12,7 +12,8 @@ subroutine simple_loop
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO
   do i=1, 9
   ! CHECK:             fir.store %[[I]] to %[[IV_DECL:.*]]#1 : !fir.ref<i32>
@@ -36,7 +37,8 @@ subroutine simple_loop_with_step
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 2 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   ! CHECK:       fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref<i32>
   ! CHECK:       %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref<i32>
   !$OMP DO
@@ -60,7 +62,8 @@ subroutine loop_with_schedule_nowait
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop schedule(runtime) nowait {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO SCHEDULE(runtime)
   do i=1, 9
   ! CHECK:       fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref<i32>
diff --git a/flang/test/Transforms/DoConcurrent/basic.f90 b/flang/test/Transforms/DoConcurrent/basic.f90
index a555a25c9bad5..248223d72ff11 100644
--- a/flang/test/Transforms/DoConcurrent/basic.f90
+++ b/flang/test/Transforms/DoConcurrent/basic.f90
@@ -23,7 +23,8 @@ program do_concurrent_basic
     ! CHECK: %[[UB:.*]] = fir.convert %[[C10]] : (i32) -> index
     ! CHECK: %[[STEP:.*]] = arith.constant 1 : index
 
-    ! CHECK: omp.wsloop for (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
+    ! CHECK: omp.wsloop {
+    ! CHECK: omp.loopnest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
     ! CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32
     ! CHECK-NEXT: fir.store %[[IV_IDX]] to %[[BINDING]]#1 : !fir.ref<i32>
     ! CHECK-NEXT: %[[IV_VAL1:.*]] = fir.load %[[BINDING]]#0 : !fir.ref<i32>
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index cf3bc9683cc89..7e6a590d10bb0 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -1204,7 +1204,7 @@ def TaskGroupOp : OpenMP_Op<"taskgroup", [AttrSizedOperandSegments,
 // 2.10.4 taskyield Construct
 //===----------------------------------------------------------------------===//
 
-def TaskyieldOp : OpenMP_Op<"taskyield"> {
+def TaskYieldOp : OpenMP_Op<"taskyield"> {
   let summary = "taskyield construct";
   let description = [{
     The taskyield construct specifies that the current task can be suspended
@@ -1723,6 +1723,10 @@ def TargetOp : OpenMP_Op<"target",[IsolatedFromAbove, MapClauseOwningOpInterface
     TODO:  is_device_ptr, defaultmap, in_reduction
   }];
 
+  // TODO Remove num_teams_lower, num_teams_upper, teams_thread_limit and
+  // num_threads args and instead calculate them outside and implicitly map
+  // them. If not used and implicitly mapped, they can be omitted from the
+  // outlined function arg list.
   let arguments = (ins Optional<I1>:$if_expr,
                        Optional<AnyInteger>:$device,
                        Optional<AnyInteger>:$thread_limit,
@@ -1914,7 +1918,7 @@ def OrderedRegionOp : OpenMP_Op<"ordered_region"> {
 // 2.17.5 taskwait Construct
 //===----------------------------------------------------------------------===//
 
-def TaskwaitOp : OpenMP_Op<"taskwait"> {
+def TaskWaitOp : OpenMP_Op<"taskwait"> {
   let summary = "taskwait construct";
   let description = [{
     The taskwait construct specifies a wait on the completion of child tasks
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index 83eb1653ca950..117db5c39db59 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -319,8 +319,8 @@ void ConvertOpenMPToLLVMPass::runOnOperation() {
   populateOpenMPToLLVMConversionPatterns(converter, patterns);
 
   LLVMConversionTarget target(getContext());
-  target.addLegalOp<omp::TerminatorOp, omp::TaskyieldOp, omp::FlushOp,
-                    omp::BarrierOp, omp::TaskwaitOp>();
+  target.addLegalOp<omp::TerminatorOp, omp::TaskYieldOp, omp::FlushOp,
+                    omp::BarrierOp, omp::TaskWaitOp>();
   configureOpenMPToLLVMConversionLegality(target, converter);
   if (failed(applyPartialConversion(module, target, std::move(patterns))))
     signalPassFailure();
diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
index 464a647564ace..7bebd874f47b4 100644
--- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
+++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
@@ -461,7 +461,11 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
       // Replace the loop.
       {
         OpBuilder::InsertionGuard allocaGuard(rewriter);
-        auto loop = rewriter.create<omp::WsLoopOp>(
+        // TODO Test that this didn't break something.
+        auto wsloop = rewriter.create<omp::WsLoopOp>(parallelOp.getLoc());
+        rewriter.createBlock(&wsloop.getRegion());
+
+        auto loop = rewriter.create<omp::LoopNestOp>(
             parallelOp.getLoc(), parallelOp.getLowerBound(),
             parallelOp.getUpperBound(), parallelOp.getStep());
         rewriter.create<omp::TerminatorOp>(loc);
@@ -482,9 +486,9 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
         rewriter.setInsertionPointToEnd(&*scope.getBodyRegion().begin());
         rewriter.create<memref::AllocaScopeReturnOp>(loc, ValueRange());
         if (!reductionVariables.empty()) {
-          loop.setReductionsAttr(
+          wsloop.setReductionsAttr(
               ArrayAttr::get(rewriter.getContext(), reductionDeclSymbols));
-          loop.getReductionVarsMutable().append(reductionVariables);
+          wsloop.getReductionVarsMutable().append(reductionVariables);
         }
       }
     }
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index c7130e8e2c9f3..bdaecb5e6de9e 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1933,6 +1933,9 @@ LogicalResult WsLoopOp::verify() {
   // TODO If composite, must have composite parallel parent or simd and no
   // wrapper parent. Otherwise, no composite parent.
 
+  // TODO If composite, must have composite parallel parent or simd and no
+  // wrapper parent. Otherwise, no composite parent.
+
   return verifyReductionVarList(*this, getReductions(), getReductionVars());
 }
 
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 3605e03ae886e..7e5e3c47e0d03 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -893,31 +893,32 @@ static LogicalResult
 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
                  LLVM::ModuleTranslation &moduleTranslation) {
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
-  auto loop = cast<omp::WsLoopOp>(opInst);
+  auto wsloop = cast<omp::WsLoopOp>(opInst);
+  auto loop = cast<omp::LoopNestOp>(wsloop.getWrappedLoop());
   // TODO: this should be in the op verifier instead.
   if (loop.getLowerBound().empty())
     return failure();
 
   // Static is the default.
   auto schedule =
-      loop.getScheduleVal().value_or(omp::ClauseScheduleKind::Static);
+      wsloop.getScheduleVal().value_or(omp::ClauseScheduleKind::Static);
 
   // Find the loop configuration.
   llvm::Value *step = moduleTranslation.lookupValue(loop.getStep()[0]);
   llvm::Type *ivType = step->getType();
   llvm::Value *chunk = nullptr;
-  if (loop.getScheduleChunkVar()) {
+  if (wsloop.getScheduleChunkVar()) {
     llvm::Value *chunkVar =
-        moduleTranslation.lookupValue(loop.getScheduleChunkVar());
+        moduleTranslation.lookupValue(wsloop.getScheduleChunkVar());
     chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
   }
   SmallVector<omp::ReductionDeclareOp> reductionDecls;
-  collectReductionDecls(loop, reductionDecls);
+  collectReductionDecls(wsloop, reductionDecls);
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);
 
   DenseMap<Value, llvm::Value *> reductionVariableMap;
-  allocReductionVars(loop, builder, *ompBuilder, moduleTranslation, allocaIP,
+  allocReductionVars(wsloop, builder, *ompBuilder, moduleTranslation, allocaIP,
                      reductionDecls, reductionVariableMap);
 
   // Store the mapping between reduction variables and their private copies on
@@ -929,7 +930,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   // Before the loop, store the initial values of reductions into reduction
   // variables. Although this could be done after allocas, we don't want to mess
   // up with the alloca insertion point.
-  for (unsigned i = 0; i < loop.getNumReductionVars(); ++i) {
+  for (unsigned i = 0; i < wsloop.getNumReductionVars(); ++i) {
     SmallVector<llvm::Value *> phis;
     if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
                                        "omp.reduction.neutral", builder,
@@ -1020,10 +1021,10 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
 
   // TODO: Handle doacross loops when the ordered clause has a parameter.
-  bool isOrdered = loop.getOrderedVal().has_value();
+  bool isOrdered = wsloop.getOrderedVal().has_value();
   std::optional<omp::ScheduleModifier> scheduleModifier =
-      loop.getScheduleModifier();
-  bool isSimd = loop.getSimdModifier();
+      wsloop.getScheduleModifier();
+  bool isSimd = wsloop.getSimdModifier();
 
   bool distributeCodeGen = opInst.getParentOfType<omp::DistributeOp>();
   bool parallelCodeGen = opInst.getParentOfType<omp::ParallelOp>();
@@ -1036,7 +1037,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
     workshareLoopType = llvm::omp::WorksharingLoopType::ForStaticLoop;
   }
   ompBuilder->applyWorkshareLoop(
-      ompLoc.DL, loopInfo, allocaIP, !loop.getNowait(),
+      ompLoc.DL, loopInfo, allocaIP, !wsloop.getNowait(),
       convertToScheduleKind(schedule), chunk, isSimd,
       scheduleModifier == omp::ScheduleModifier::monotonic,
       scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered,
@@ -1049,12 +1050,12 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   builder.restoreIP(afterIP);
 
   // Process the reductions if required.
-  if (loop.getNumReductionVars() == 0)
+  if (wsloop.getNumReductionVars() == 0)
     return success();
 
   // Create the reduction generators. We need to own them here because
   // ReductionInfo only accepts references to the generators.
-  collectReductionInfo(loop, builder, *ompBuilder, moduleTranslation,
+  collectReductionInfo(wsloop, builder, *ompBuilder, moduleTranslation,
                        reductionDecls);
   // The call to createReductions below expects the block to have a
   // terminator. Create an unreachable instruction to serve as terminator
@@ -1063,12 +1064,12 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   builder.SetInsertPoint(tempTerminator);
 
   llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
-      ompBuilder->createReductions(builder.saveIP(), allocaIP,
-                                   ompBuilder->RIManager.getReductionInfos(),
-                                   loop.getNowait(), /*IsTeamsReduction*/ false,
-                                   /*HasDistribute*/ distributeCodeGen);
+      ompBuilder->createReductions(
+          builder.saveIP(), allocaIP, ompBuilder->RIManager.getReductionInfos(),
+          wsloop.getNowait(), /*IsTeamsReduction*/ false,
+          /*HasDistribute*/ distributeCodeGen);
   if (!contInsertPoint.getBlock())
-    return loop->emitOpError() << "failed to convert reductions";
+    return wsloop->emitOpError() << "failed to convert reductions";
   auto nextInsertionPoint =
       ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
   tempTerminator->eraseFromParent();
@@ -1331,7 +1332,8 @@ convertOmpParallel(Operation &opInst1, llvm::IRBuilderBase &builder,
 static LogicalResult
 convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
                    LLVM::ModuleTranslation &moduleTranslation) {
-  auto loop = cast<omp::SimdLoopOp>(opInst);
+  auto simd = cast<omp::SimdLoopOp>(opInst);
+  auto loop = cast<omp::LoopNestOp>(simd.getWrappedLoop());
 
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
 
@@ -1410,17 +1412,17 @@ convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
       ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
 
   llvm::ConstantInt *simdlen = nullptr;
-  if (std::optional<uint64_t> simdlenVar = loop.getSimdlen())
+  if (std::optional<uint64_t> simdlenVar = simd.getSimdlen())
     simdlen = builder.getInt64(simdlenVar.value());
 
   llvm::ConstantInt *safelen = nullptr;
-  if (std::optional<uint64_t> safelenVar = loop.getSafelen())
+  if (std::optional<uint64_t> safelenVar = simd.getSafelen())
     safelen = builder.getInt64(safelenVar.value());
 
   llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
   ompBuilder->applySimd(
       loopInfo, alignedVars,
-      loop.getIfExpr() ? moduleTranslation.lookupValue(loop.getIfExpr())
+      simd.getIfExpr() ? moduleTranslation.lookupValue(simd.getIfExpr())
                        : nullptr,
       llvm::omp::OrderKind::OMP_ORDER_unknown, simdlen, safelen);
 
@@ -3320,11 +3322,11 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
         ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
         return success();
       })
-      .Case([&](omp::TaskwaitOp) {
+      .Case([&](omp::TaskWaitOp) {
         ompBuilder->createTaskwait(builder.saveIP());
         return success();
       })
-      .Case([&](omp::TaskyieldOp) {
+      .Case([&](omp::TaskYieldOp) {
         ompBuilder->createTaskyield(builder.saveIP());
         return success();
       })
diff --git a/mlir/test/CAPI/execution_engine.c b/mlir/test/CAPI/execution_engine.c
index 38a8fb8c3e213..a3bf38bdfb131 100644
--- a/mlir/test/CAPI/execution_engine.c
+++ b/mlir/test/CAPI/execution_engine.c
@@ -99,10 +99,13 @@ void testOmpCreation(void) {
 "    %1 = arith.constant 1 : i32                                                \n"
 "    %2 = arith.constant 2 : i32                                                \n"
 "    omp.parallel {                                                             \n"
-"      omp.wsloop for (%3) : i32 = (%0) to (%2) step (%1) {                     \n"
+"      omp.wsloop {                                                             \n"
+"      omp.loopnest (%3) : i32 = (%0) to (%2) step (%1) {                       \n"
 "        omp.yield                                                              \n"
 "      }                                                                        \n"
 "      omp.terminator                                                           \n"
+"      }                                                                        \n"
+"      omp.terminator                                                           \n"
 "    }                                                                          \n"
 "    llvm.return                                                                \n"
 "  }                                                                            \n"
diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
index 6cbc0c8f4be9a..de5dfc1a288d0 100644
--- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
+++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
@@ -71,7 +71,8 @@ func.func @branch_loop() {
 func.func @wsloop(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: omp.parallel
   omp.parallel {
-    // CHECK: omp.wsloop for (%[[ARG6:.*]], %[[ARG7:.*]]) : i64 = (%[[ARG0]], %[[ARG1]]) to (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[ARG5]]) {
+    // CHECK: omp.wsloop {
+    // CHECK: omp.loopnest   (%[[ARG6:.*]], %[[ARG7:.*]]) : i64 = (%[[ARG0]], %[[ARG1]]) to (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[ARG5]]) {
     "omp.wsloop"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) ({
     ^bb0(%arg6: index, %arg7: index):
       // CHECK-DAG: %[[CAST_ARG6:.*]] = builtin.unrealized_conversion_cast %[[ARG6]] : i64 to index
@@ -320,7 +321,8 @@ llvm.func @_QPsb() {
 // CHECK-LABEL:  @_QPsimple_reduction
 // CHECK:    %[[RED_ACCUMULATOR:.*]] = llvm.alloca %{{.*}} x i32 {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} : (i64) -> !llvm.ptr
 // CHECK:    omp.parallel
-// CHECK:      omp.wsloop reduction(@eqv_reduction %{{.+}} -> %[[PRV:.+]] : !llvm.ptr) for
+// CHECK:      omp.wsloop reduction(@eqv_reduction %{{.+}} -> %[[PRV:.+]] : !llvm.ptr) {
+// CHECK:      omp.loopnest
 // CHECK:        %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> i32
 // CHECK:        %[[CMP:.+]] = llvm.icmp "eq" %{{.*}}, %[[LPRV]] : i32
 // CHECK:        %[[ZEXT:.+]] = llvm.zext %[[CMP]] : i1 to i32
@@ -353,7 +355,8 @@ llvm.func @_QPsimple_reduction(%arg0: !llvm.ptr {fir.bindc_name = "y"}) {
   llvm.store %5, %4 : i32, !llvm.ptr
   omp.parallel   {
     %6 = llvm.alloca %3 x i32 {adapt.valuebyref, in_type = i32, operandSegmentSizes = array<i32: 0, 0>, pinned} : (i64) -> !llvm.ptr
-    omp.wsloop   reduction(@eqv_reduction %4 -> %prv : !llvm.ptr) for  (%arg1) : i32 = (%1) to (%0) inclusive step (%1) {
+    omp.wsloop   reduction(@eqv_reduction %4 -> %prv : !llvm.ptr) {
+    omp.loopnest  (%arg1) : i32 = (%1) to (%0) inclusive step (%1) {
       llvm.store %arg1, %6 : i32, !llvm.ptr
       %7 = llvm.load %6 : !llvm.ptr -> i32
       %8 = llvm.sext %7 : i32 to i64
@@ -367,6 +370,8 @@ llvm.func @_QPsimple_reduction(%arg0: !llvm.ptr {fir.bindc_name = "y"}) {
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
diff --git a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
index acd2690c56e2e..d1e515b8ae813 100644
--- a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
+++ b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
@@ -5,7 +5,8 @@ func.func @parallel(%arg0: index, %arg1: index, %arg2: index,
           %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loopnest   (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
   // CHECK: memref.alloca_scope
   scf.parallel (%i, %j) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
     // CHECK: "test.payload"(%[[LVAR1]], %[[LVAR2]]) : (index, index) -> ()
@@ -23,8 +24,9 @@ func.func @nested_loops(%arg0: index, %arg1: index, %arg2: index,
                    %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR_OUT1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
-    // CHECK: memref.alloca_scope
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loopnest (%[[LVAR_OUT1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
+  // CHECK: memref.alloca_scope
   scf.parallel (%i) = (%arg0) to (%arg2) step (%arg4) {
     // CHECK: omp.parallel
     // CHECK: omp.wsloop for (%[[LVAR_IN1:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
@@ -47,7 +49,8 @@ func.func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index,
                      %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR_AL1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loopnest (%[[LVAR_AL1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
   // CHECK: memref.alloca_scope
   scf.parallel (%i) = (%arg0) to (%arg2) step (%arg4) {
     // CHECK: "test.payload1"(%[[LVAR_AL1]]) : (index) -> ()
@@ -60,7 +63,8 @@ func.func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index,
 
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR_AL2:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loopnest (%[[LVAR_AL2:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
   // CHECK: memref.alloca_scope
   scf.parallel (%j) = (%arg1) to (%arg3) step (%arg5) {
     // CHECK: "test.payload2"(%[[LVAR_AL2]]) : (index) -> ()
diff --git a/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir b/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir
index 37720e98d92a9..f48f8152ee022 100644
--- a/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir
+++ b/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir
@@ -32,7 +32,8 @@ llvm.func @repeated_successor_no_args(%arg0: i1) {
 
 // CHECK: @repeated_successor_openmp
 llvm.func @repeated_successor_openmp(%arg0: i64, %arg1: i64, %arg2: i64, %arg3: i1) {
-  omp.wsloop for (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) {
+  omp.wsloop {
+    omp.loopnest  (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) {
     // CHECK: llvm.cond_br %{{.*}}, ^[[BB1:.*]]({{.*}}), ^[[BB2:.*]]({{.*}})
     llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64)
   // CHECK: ^[[BB1]]
@@ -41,6 +42,8 @@ llvm.func @repeated_successor_openmp(%arg0: i64, %arg1: i64, %arg2: i64, %arg3:
   // CHECK: ^[[BB2]](%[[ARG:.*]]: i64):
   // CHECK:  llvm.br ^[[BB1]](%[[ARG]] : i64)
   }
+  omp.terminator
+  }
   llvm.return
 }
 
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 448f37b32fff6..8a8ec498ed76b 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -98,7 +98,7 @@ func.func @invalid_parent(%lb : index, %ub : index, %step : index) {
 
 func.func @type_mismatch(%lb : index, %ub : index, %step : index) {
   // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // expected-error@+1 {{range argument type does not match corresponding IV type}}
     "omp.loopnest" (%lb, %ub, %step) ({
     ^bb0(%iv2: i32):
@@ -112,7 +112,7 @@ func.func @type_mismatch(%lb : index, %ub : index, %step : index) {
 
 func.func @iv_number_mismatch(%lb : index, %ub : index, %step : index) {
   // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // expected-error@+1 {{number of range arguments and IVs do not match}}
     "omp.loopnest" (%lb, %ub, %step) ({
     ^bb0(%iv1 : index, %iv2 : index):
@@ -125,7 +125,7 @@ func.func @iv_number_mismatch(%lb : index, %ub : index, %step : index) {
 // -----
 
 func.func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) {
-  // expected-error @below {{expected 'for'}}
+  // expected-error @below {{expected '{' to begin a region}}
   omp.wsloop nowait inclusive
   for (%iv) : index = (%lb) to (%ub) step (%step) {
     omp.yield
@@ -145,7 +145,7 @@ func.func @order_value(%lb : index, %ub : index, %step : index) {
 // -----
 
 func.func @if_not_allowed(%lb : index, %ub : index, %step : index, %bool_var : i1) {
-  // expected-error @below {{expected 'for'}}
+  // expected-error @below {{expected '{'}}
   omp.wsloop if(%bool_var: i1)
   for (%iv) : index = (%lb) to (%ub) step (%step) {
     omp.yield
@@ -155,7 +155,7 @@ func.func @if_not_allowed(%lb : index, %ub : index, %step : index, %bool_var : i
 // -----
 
 func.func @num_threads_not_allowed(%lb : index, %ub : index, %step : index, %int_var : i32) {
-  // expected-error @below {{expected 'for'}}
+  // expected-error @below {{expected '{'}}
   omp.wsloop num_threads(%int_var: i32)
   for (%iv) : index = (%lb) to (%ub) step (%step) {
     omp.yield
@@ -479,12 +479,14 @@ func.func @foo(%lb : index, %ub : index, %step : index) {
   %1 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 
   // expected-error @below {{expected symbol reference @foo to point to a reduction declaration}}
-  omp.wsloop reduction(@foo %0 -> %prv : !llvm.ptr)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop reduction(@foo %0 -> %prv : !llvm.ptr) {
+  omp.loopnest (%iv) : index = (%lb) to (%ub) step (%step) {
     %2 = arith.constant 2.0 : f32
     omp.reduction %2, %1 : f32, !llvm.ptr
     omp.yield
   }
+  omp.terminator
+  }
   return
 }
 
@@ -507,12 +509,14 @@ func.func @foo(%lb : index, %ub : index, %step : index) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 
   // expected-error @below {{accumulator variable used more than once}}
-  omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr, @add_f32 %0 -> %prv1 : !llvm.ptr)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr, @add_f32 %0 -> %prv1 : !llvm.ptr) {
+  omp.loopnest (%iv) : index = (%lb) to (%ub) step (%step) {
     %2 = arith.constant 2.0 : f32
     omp.reduction %2, %0 : f32, !llvm.ptr
     omp.yield
   }
+  omp.terminator
+  }
   return
 }
 
@@ -540,12 +544,14 @@ func.func @foo(%lb : index, %ub : index, %step : index, %mem : memref<1xf32>) {
   %c1 = arith.constant 1 : i32
 
   // expected-error @below {{expected accumulator ('memref<1xf32>') to be the same type as reduction declaration ('!llvm.ptr')}}
-  omp.wsloop reduction(@add_f32 %mem -> %prv : memref<1xf32>)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop reduction(@add_f32 %mem -> %prv : memref<1xf32>) {
+  omp.loopnest (%iv) : index = (%lb) to (%ub) step (%step) {
     %2 = arith.constant 2.0 : f32
     omp.reduction %2, %mem : f32, memref<1xf32>
     omp.yield
   }
+  omp.terminator
+  }
   return
 }
 
@@ -577,27 +583,32 @@ omp.critical.declare @mutex hint(invalid_hint)
 // -----
 
 func.func @omp_ordered1(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop ordered(1)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+  omp.wsloop ordered(1) {
+  omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
     // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}}
     omp.ordered_region {
       omp.terminator
     }
     omp.yield
   }
+  omp.terminator
+  }
   return
 }
 
 // -----
 
 func.func @omp_ordered2(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+  omp.wsloop {
+    omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
     // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}}
     omp.ordered_region {
       omp.terminator
     }
     omp.yield
   }
+  omp.terminator
+  }
   return
 }
 
@@ -612,25 +623,29 @@ func.func @omp_ordered3(%vec0 : i64) -> () {
 // -----
 
 func.func @omp_ordered4(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64) -> () {
-  omp.wsloop ordered(0)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+  omp.wsloop ordered(0) {
+  omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
     // expected-error @below {{ordered depend directive must be closely nested inside a worksharing-loop with ordered clause with parameter present}}
     omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
 
     omp.yield
   }
+  omp.terminator
+  }
   return
 }
 // -----
 
 func.func @omp_ordered5(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64, %vec1 : i64) -> () {
-  omp.wsloop ordered(1)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+  omp.wsloop ordered(1) {
+  omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
     // expected-error @below {{number of variables in depend clause does not match number of iteration variables in the doacross loop}}
     omp.ordered depend_type(dependsource) depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64}
 
     omp.yield
   }
+  omp.terminator
+  }
   return
 }
 
@@ -1462,12 +1477,14 @@ func.func @omp_cancel2() {
 // -----
 
 func.func @omp_cancel3(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop nowait
-    for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+  omp.wsloop nowait {
+    omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
     // expected-error @below {{A worksharing construct that is canceled must not have a nowait clause}}
     omp.cancel cancellation_construct_type(loop)
     // CHECK: omp.terminator
     omp.terminator
+  }
+    omp.terminator
   }
   return
 }
@@ -1475,12 +1492,14 @@ func.func @omp_cancel3(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
 // -----
 
 func.func @omp_cancel4(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop ordered(1)
-    for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+  omp.wsloop ordered(1) {
+    omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
     // expected-error @below {{A worksharing construct that is canceled must not have an ordered clause}}
     omp.cancel cancellation_construct_type(loop)
     // CHECK: omp.terminator
     omp.terminator
+  }
+    omp.terminator
   }
   return
 }
diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
index afbf5f2224630..d1758035b8956 100644
--- a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
@@ -12,10 +12,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
       %loop_ub = llvm.mlir.constant(9 : i32) : i32
       %loop_lb = llvm.mlir.constant(0 : i32) : i32
       %loop_step = llvm.mlir.constant(1 : i32) : i32
-      omp.wsloop for  (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
+      omp.wsloop {
+        omp.loopnest (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
         %gep = llvm.getelementptr %arg0[0, %loop_cnt] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32>
         llvm.store %loop_cnt, %gep : i32, !llvm.ptr
         omp.yield
+        }
+        omp.terminator
       }
      omp.terminator
     }
diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
index 435aca32450c2..36fa2261e385c 100644
--- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
@@ -8,7 +8,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
     %loop_ub = llvm.mlir.constant(99 : i32) : i32
     %loop_lb = llvm.mlir.constant(0 : i32) : i32
     %loop_step = llvm.mlir.constant(1 : index) : i32
-    omp.wsloop for  (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) {
+    omp.wsloop {
+    omp.loopnest  (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) {
       %1 = llvm.add %arg1, %arg2  : i32
       %2 = llvm.mul %arg2, %loop_ub overflow<nsw>  : i32
       %3 = llvm.add %arg1, %2 :i32
@@ -16,6 +17,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
       llvm.store %1, %4 : i32, !llvm.ptr
       omp.yield
     }
+    omp.terminator
+    }
     llvm.return
   }
 }
diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
index 4cfb7d4f69514..0f48d45c53a09 100644
--- a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
@@ -8,10 +8,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
       %loop_ub = llvm.mlir.constant(9 : i32) : i32
       %loop_lb = llvm.mlir.constant(0 : i32) : i32
       %loop_step = llvm.mlir.constant(1 : i32) : i32
-      omp.wsloop for  (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
+      omp.wsloop {
+      omp.loopnest  (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
         %gep = llvm.getelementptr %arg0[0, %loop_cnt] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32>
         llvm.store %loop_cnt, %gep : i32, !llvm.ptr
         omp.yield
+        }
+        omp.terminator
       }
     llvm.return
   }
@@ -20,8 +23,11 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
       %loop_ub = llvm.mlir.constant(9 : i32) : i32
       %loop_lb = llvm.mlir.constant(0 : i32) : i32
       %loop_step = llvm.mlir.constant(1 : i32) : i32
-      omp.wsloop for  (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
+      omp.wsloop {
+      omp.loopnest  (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
         omp.yield
+        }
+      omp.terminator
       }
     llvm.return
   }
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 3571526c078a5..a7393c47e1c5d 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -320,7 +320,8 @@ llvm.func @wsloop_simple(%arg0: !llvm.ptr) {
   %1 = llvm.mlir.constant(10 : index) : i64
   %2 = llvm.mlir.constant(1 : index) : i64
   omp.parallel {
-    "omp.wsloop"(%1, %0, %2) ({
+    omp.wsloop {
+    "omp.loopnest" (%1, %0, %2) ({
     ^bb0(%arg1: i64):
       // The form of the emitted IR is controlled by OpenMPIRBuilder and
       // tested there. Just check that the right functions are called.
@@ -334,6 +335,8 @@ llvm.func @wsloop_simple(%arg0: !llvm.ptr) {
     }) {operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 0, 0>} : (i64, i64, i64) -> ()
     omp.terminator
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -345,13 +348,16 @@ llvm.func @wsloop_inclusive_1(%arg0: !llvm.ptr) {
   %1 = llvm.mlir.constant(10 : index) : i64
   %2 = llvm.mlir.constant(1 : index) : i64
   // CHECK: store i64 31, ptr %{{.*}}upperbound
-  "omp.wsloop"(%1, %0, %2) ({
+  omp.wsloop {
+  "omp.loopnest"(%1, %0, %2) ({
   ^bb0(%arg1: i64):
     %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
     %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
     llvm.store %3, %4 : f32, !llvm.ptr
     omp.yield
   }) {operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 0, 0>} : (i64, i64, i64) -> ()
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -363,13 +369,16 @@ llvm.func @wsloop_inclusive_2(%arg0: !llvm.ptr) {
   %1 = llvm.mlir.constant(10 : index) : i64
   %2 = llvm.mlir.constant(1 : index) : i64
   // CHECK: store i64 32, ptr %{{.*}}upperbound
-  "omp.wsloop"(%1, %0, %2) ({
+  omp.wsloop {
+  "omp.loopnest"(%1, %0, %2) ({
   ^bb0(%arg1: i64):
     %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
     %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
     llvm.store %3, %4 : f32, !llvm.ptr
     omp.yield
   }) {inclusive, operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 0, 0>} : (i64, i64, i64) -> ()
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -379,13 +388,15 @@ llvm.func @body(i32)
 
 // CHECK-LABEL: @test_omp_wsloop_static_defchunk
 llvm.func @test_omp_wsloop_static_defchunk(%lb : i32, %ub : i32, %step : i32) -> () {
- omp.wsloop schedule(static)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(static) {
+ omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) {
    // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 34, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 0)
    // CHECK: call void @__kmpc_for_static_fini
    llvm.call @body(%iv) : (i32) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -396,13 +407,15 @@ llvm.func @body(i32)
 // CHECK-LABEL: @test_omp_wsloop_static_1
 llvm.func @test_omp_wsloop_static_1(%lb : i32, %ub : i32, %step : i32) -> () {
  %static_chunk_size = llvm.mlir.constant(1 : i32) : i32
- omp.wsloop schedule(static = %static_chunk_size : i32)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(static = %static_chunk_size : i32) {
+ omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) {
    // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 33, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 1)
    // CHECK: call void @__kmpc_for_static_fini
    llvm.call @body(%iv) : (i32) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -413,13 +426,15 @@ llvm.func @body(i32)
 // CHECK-LABEL: @test_omp_wsloop_static_2
 llvm.func @test_omp_wsloop_static_2(%lb : i32, %ub : i32, %step : i32) -> () {
  %static_chunk_size = llvm.mlir.constant(2 : i32) : i32
- omp.wsloop schedule(static = %static_chunk_size : i32)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(static = %static_chunk_size : i32) {
+ omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) {
    // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 33, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 2)
    // CHECK: call void @__kmpc_for_static_fini
    llvm.call @body(%iv) : (i32) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -428,8 +443,8 @@ llvm.func @test_omp_wsloop_static_2(%lb : i32, %ub : i32, %step : i32) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(dynamic)
- for (%iv) : i64 = (%lb) to (%ub) step (%step)  {
+ omp.wsloop schedule(dynamic) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step)  {
   // CHECK: call void @__kmpc_dispatch_init_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
   // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -437,6 +452,8 @@ llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () {
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -446,8 +463,8 @@ llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_chunk_const(%lb : i64, %ub : i64, %step : i64) -> () {
  %chunk_size_const = llvm.mlir.constant(2 : i16) : i16
- omp.wsloop schedule(dynamic = %chunk_size_const : i16)
- for (%iv) : i64 = (%lb) to (%ub) step (%step)  {
+ omp.wsloop schedule(dynamic = %chunk_size_const : i16) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step)  {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i64 {{.*}}, i64 %{{.*}}, i64 {{.*}}, i64 2)
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
   // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -455,6 +472,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_const(%lb : i64, %ub : i64, %step : i64
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -466,8 +485,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var(%lb : i32, %ub : i32, %step : i32)
  %1 = llvm.mlir.constant(1 : i64) : i64
  %chunk_size_alloca = llvm.alloca %1 x i16 {bindc_name = "chunk_size", in_type = i16, uniq_name = "_QFsub1Echunk_size"} : (i64) -> !llvm.ptr
  %chunk_size_var = llvm.load %chunk_size_alloca : !llvm.ptr -> i16
- omp.wsloop schedule(dynamic = %chunk_size_var : i16)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(dynamic = %chunk_size_var : i16) {
+ omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) {
   // CHECK: %[[CHUNK_SIZE:.*]] = sext i16 %{{.*}} to i32
   // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]])
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
@@ -475,6 +494,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var(%lb : i32, %ub : i32, %step : i32)
   // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
    llvm.call @body(%iv) : (i32) -> ()
    omp.yield
+ }
+   omp.terminator
  }
  llvm.return
 }
@@ -487,8 +508,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32)
  %1 = llvm.mlir.constant(1 : i64) : i64
  %chunk_size_alloca = llvm.alloca %1 x i64 {bindc_name = "chunk_size", in_type = i64, uniq_name = "_QFsub1Echunk_size"} : (i64) -> !llvm.ptr
  %chunk_size_var = llvm.load %chunk_size_alloca : !llvm.ptr -> i64
- omp.wsloop schedule(dynamic = %chunk_size_var : i64)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(dynamic = %chunk_size_var : i64) {
+ omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) {
   // CHECK: %[[CHUNK_SIZE:.*]] = trunc i64 %{{.*}} to i32
   // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]])
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
@@ -497,6 +518,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32)
    llvm.call @body(%iv) : (i32) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -505,8 +528,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32)
 llvm.func @body(i32)
 
 llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32, %chunk_size : i32) -> () {
- omp.wsloop schedule(dynamic = %chunk_size : i32)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(dynamic = %chunk_size : i32) {
+ omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %{{.*}})
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
   // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -514,6 +537,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32,
    llvm.call @body(%iv) : (i32) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -522,8 +547,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32,
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(auto)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(auto) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
   // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -531,6 +556,8 @@ llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () {
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -539,8 +566,8 @@ llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(runtime)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+  omp.wsloop schedule(runtime) {
+  omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
     // CHECK: call void @__kmpc_dispatch_init_8u
     // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
     // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -548,6 +575,8 @@ llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () {
     llvm.call @body(%iv) : (i64) -> ()
     omp.yield
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -556,8 +585,8 @@ llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(guided)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+  omp.wsloop schedule(guided) {
+  omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
     // CHECK: call void @__kmpc_dispatch_init_8u
     // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
     // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -565,6 +594,8 @@ llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () {
     llvm.call @body(%iv) : (i64) -> ()
     omp.yield
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -573,8 +604,8 @@ llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(dynamic, nonmonotonic)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+  omp.wsloop schedule(dynamic, nonmonotonic) {
+  omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
     // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859
     // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
     // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -582,6 +613,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i6
     llvm.call @body(%iv) : (i64) -> ()
     omp.yield
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -590,8 +623,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i6
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(dynamic, monotonic)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+  omp.wsloop schedule(dynamic, monotonic) {
+  omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
     // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 536870947
     // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
     // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -599,6 +632,8 @@ llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64)
     llvm.call @body(%iv) : (i64) -> ()
     omp.yield
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -607,8 +642,8 @@ llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64)
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(runtime, simd)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+  omp.wsloop schedule(runtime, simd) {
+  omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
     // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741871
     // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
     // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -616,6 +651,8 @@ llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> ()
     llvm.call @body(%iv) : (i64) -> ()
     omp.yield
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -624,8 +661,8 @@ llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> ()
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(guided, simd)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+  omp.wsloop schedule(guided, simd) {
+  omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
     // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741870
     // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
     // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -633,6 +670,8 @@ llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> ()
     llvm.call @body(%iv) : (i64) -> ()
     omp.yield
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -782,8 +821,8 @@ llvm.func @simdloop_if(%arg0: !llvm.ptr {fir.bindc_name = "n"}, %arg1: !llvm.ptr
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -792,6 +831,8 @@ llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -800,8 +841,8 @@ llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_static_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(static) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(static) ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -810,6 +851,8 @@ llvm.func @test_omp_wsloop_static_ordered(%lb : i64, %ub : i64, %step : i64) ->
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -819,8 +862,8 @@ llvm.func @body(i32)
 
 llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i32) -> () {
  %static_chunk_size = llvm.mlir.constant(1 : i32) : i32
- omp.wsloop schedule(static = %static_chunk_size : i32) ordered(0)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(static = %static_chunk_size : i32) ordered(0) {
+ omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 65, i32 1, i32 %{{.*}}, i32 1, i32 1)
   // CHECK: call void @__kmpc_dispatch_fini_4u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
@@ -829,6 +872,8 @@ llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i3
    llvm.call @body(%iv) : (i32) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -837,8 +882,8 @@ llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i3
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(dynamic) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(dynamic) ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 67, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -847,6 +892,8 @@ llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) ->
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -855,8 +902,8 @@ llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) ->
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(auto) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(auto) ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 70, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -865,6 +912,8 @@ llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> ()
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -873,8 +922,8 @@ llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> ()
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(runtime) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(runtime) ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 69, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -883,6 +932,8 @@ llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) ->
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -891,8 +942,8 @@ llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) ->
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(guided) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(guided) ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 68, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -901,6 +952,8 @@ llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) ->
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -909,8 +962,8 @@ llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) ->
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(dynamic, nonmonotonic) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(dynamic, nonmonotonic) ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741891, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -919,6 +972,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %s
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -927,8 +982,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %s
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_monotonic_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(dynamic, monotonic) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(dynamic, monotonic) ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 536870979, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -937,6 +992,8 @@ llvm.func @test_omp_wsloop_dynamic_monotonic_ordered(%lb : i64, %ub : i64, %step
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -1103,8 +1160,8 @@ llvm.func @collapse_wsloop(
     // CHECK: %[[TOTAL_SUB_1:.*]] = sub i32 %[[TOTAL]], 1
     // CHECK: store i32 %[[TOTAL_SUB_1]], ptr
     // CHECK: call void @__kmpc_for_static_init_4u
-    omp.wsloop
-    for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
+    omp.wsloop {
+    omp.loopnest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
       %31 = llvm.load %20 : !llvm.ptr -> i32
       %32 = llvm.add %31, %arg0 : i32
       %33 = llvm.add %32, %arg1 : i32
@@ -1113,6 +1170,8 @@ llvm.func @collapse_wsloop(
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -1164,8 +1223,8 @@ llvm.func @collapse_wsloop_dynamic(
     // CHECK: store i32 1, ptr
     // CHECK: store i32 %[[TOTAL]], ptr
     // CHECK: call void @__kmpc_dispatch_init_4u
-    omp.wsloop schedule(dynamic)
-    for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
+    omp.wsloop schedule(dynamic) {
+    omp.loopnest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
       %31 = llvm.load %20 : !llvm.ptr -> i32
       %32 = llvm.add %31, %arg0 : i32
       %33 = llvm.add %32, %arg1 : i32
@@ -1174,6 +1233,8 @@ llvm.func @collapse_wsloop_dynamic(
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -1196,8 +1257,8 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64,
   // CHECK: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[OMP_THREAD]])
   }
 
-  omp.wsloop ordered(0)
-  for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
+  omp.wsloop ordered(0) {
+  omp.loopnest (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
     // CHECK:  call void @__kmpc_ordered(ptr @[[GLOB3:[0-9]+]], i32 [[OMP_THREAD2:%.*]])
     omp.ordered_region  {
       omp.terminator
@@ -1205,9 +1266,11 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64,
     }
     omp.yield
   }
+  omp.terminator
+  }
 
-  omp.wsloop ordered(1)
-  for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
+  omp.wsloop ordered(1) {
+  omp.loopnest (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
     // CHECK: [[TMP:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR]], i64 0, i64 0
     // CHECK: store i64 [[ARG0:%.*]], ptr [[TMP]], align 8
     // CHECK: [[TMP2:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR]], i64 0, i64 0
@@ -1224,9 +1287,11 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64,
 
     omp.yield
   }
+  omp.terminator
+  }
 
-  omp.wsloop ordered(2)
-  for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
+  omp.wsloop ordered(2) {
+  omp.loopnest (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
     // CHECK: [[TMP5:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR5]], i64 0, i64 0
     // CHECK: store i64 [[ARG0]], ptr [[TMP5]], align 8
     // CHECK: [[TMP6:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR5]], i64 0, i64 1
@@ -1254,6 +1319,8 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64,
 
     omp.yield
   }
+  omp.terminator
+  }
 
   llvm.return
 }
@@ -2122,11 +2189,14 @@ llvm.func @omp_sections_with_clauses() -> () {
 // introduction mechanism itself is tested elsewhere.
 // CHECK-LABEL: @repeated_successor
 llvm.func @repeated_successor(%arg0: i64, %arg1: i64, %arg2: i64, %arg3: i1) {
-  omp.wsloop for (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2)  {
+  omp.wsloop {
+  omp.loopnest (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2)  {
     llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64)
   ^bb1(%0: i64):  // 2 preds: ^bb0, ^bb0
     omp.yield
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -2549,8 +2619,8 @@ llvm.func @omp_opaque_pointers(%arg0 : !llvm.ptr, %arg1: !llvm.ptr, %expr: i32)
 // CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1
 // CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1
 // CHECK: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 1
-module attributes {omp.flags = #omp.flags<debug_kind = 1, assume_teams_oversubscription = true, 
-                                          assume_threads_oversubscription = true, assume_no_thread_state = true, 
+module attributes {omp.flags = #omp.flags<debug_kind = 1, assume_teams_oversubscription = true,
+                                          assume_threads_oversubscription = true, assume_no_thread_state = true,
                                           assume_no_nested_parallelism = true>} {}
 // -----
 
@@ -2595,8 +2665,8 @@ module attributes {omp.version = #omp.version<version = 51>} {}
 // CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
 // CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
 // CHECK: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0
-module attributes {omp.flags = #omp.flags<debug_kind = 0, assume_teams_oversubscription = false, 
-                                          assume_threads_oversubscription = false, assume_no_thread_state = false, 
+module attributes {omp.flags = #omp.flags<debug_kind = 0, assume_teams_oversubscription = false,
+                                          assume_threads_oversubscription = false, assume_no_thread_state = false,
                                           assume_no_nested_parallelism = false>} {}
 
 // -----
diff --git a/mlir/test/Target/LLVMIR/openmp-nested.mlir b/mlir/test/Target/LLVMIR/openmp-nested.mlir
index e1fdfdd24a3cb..35bc63fb3b4a5 100644
--- a/mlir/test/Target/LLVMIR/openmp-nested.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-nested.mlir
@@ -11,13 +11,17 @@ module {
       %2 = llvm.mlir.constant(0 : index) : i64
       %4 = llvm.mlir.constant(0 : i32) : i32
       %12 = llvm.alloca %0 x i64 : (i64) -> !llvm.ptr
-      omp.wsloop for (%arg2) : i64 = (%2) to (%1) step (%0)  {
+      omp.wsloop {
+      omp.loopnest (%arg2) : i64 = (%2) to (%1) step (%0)  {
         omp.parallel   {
-          omp.wsloop for (%arg3) : i64 = (%2) to (%0) step (%0)  {
+          omp.wsloop {
+          omp.loopnest (%arg3) : i64 = (%2) to (%0) step (%0)  {
             llvm.store %2, %12 : i64, !llvm.ptr
             omp.yield
           }
           omp.terminator
+          }
+          omp.terminator
         }
         %19 = llvm.load %12 : !llvm.ptr -> i64
         %20 = llvm.trunc %19 : i64 to i32
@@ -27,6 +31,8 @@ module {
         omp.yield
       }
       omp.terminator
+      }
+      omp.terminator
     }
     %a4 = llvm.mlir.constant(0 : i32) : i32
     llvm.return %a4 : i32
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
index 9543458e950be..deeaf8219ce95 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
@@ -26,8 +26,8 @@ llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) {
   %c1 = llvm.mlir.constant(1 : i32) : i32
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   omp.parallel {
-    omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr)
-    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+    omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
+    omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
       %1 = llvm.mlir.constant(2.0 : f32) : f32
       %2 = llvm.load %prv : !llvm.ptr -> f32
       %3 = llvm.fadd %1, %2 : f32
@@ -35,6 +35,8 @@ llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) {
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -105,8 +107,8 @@ llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   omp.parallel {
-    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr)
-    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) {
+    omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
       %1 = llvm.mlir.constant(2.0 : f32) : f32
       %3 = llvm.load %prv0 : !llvm.ptr -> f32
       %4 = llvm.fadd %3, %1 : f32
@@ -117,6 +119,8 @@ llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) {
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -195,8 +199,8 @@ llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   omp.parallel {
-    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr)
-    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) {
+    omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
       %1 = llvm.mlir.constant(2.0 : f32) : f32
       %3 = llvm.load %prv0 : !llvm.ptr -> f32
       %4 = llvm.fadd %3, %1 : f32
@@ -204,6 +208,8 @@ llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) {
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -280,8 +286,8 @@ llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) {
   %c1 = llvm.mlir.constant(1 : i32) : i32
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   omp.parallel {
-    omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr)
-    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+    omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
+    omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
       %1 = llvm.mlir.constant(2.0 : f32) : f32
       %2 = llvm.load %prv : !llvm.ptr -> f32
       %3 = llvm.fadd %2, %1 : f32
@@ -292,6 +298,8 @@ llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) {
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -374,8 +382,8 @@ llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   omp.parallel {
-    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @mul_f32 %2 -> %prv1 : !llvm.ptr)
-    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @mul_f32 %2 -> %prv1 : !llvm.ptr) {
+    omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
       %1 = llvm.mlir.constant(2.0 : f32) : f32
       %3 = llvm.load %prv0 : !llvm.ptr -> f32
       %4 = llvm.fadd %3, %1 : f32
@@ -386,6 +394,8 @@ llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) {
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -529,9 +539,10 @@ llvm.func @parallel_nested_workshare_reduction(%ub : i64) {
 
   %lb = llvm.mlir.constant(1 : i64) : i64
   %step = llvm.mlir.constant(1 : i64) : i64
-  
+
   omp.parallel reduction(@add_i32 %0 -> %prv : !llvm.ptr) {
-    omp.wsloop for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+    omp.wsloop {
+    omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
       %ival = llvm.trunc %iv : i64 to i32
       %lprv = llvm.load %prv : !llvm.ptr -> i32
       %add = llvm.add %lprv, %ival : i32
@@ -539,6 +550,8 @@ llvm.func @parallel_nested_workshare_reduction(%ub : i64) {
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
 
   llvm.return