Fix remaining issues after merge.

skatrak · jsjodin · skatrak · commit 25d3695bb888 · 2024-03-21T12:15:17.000Z
- Cherry-pick some unit test fixes from PR #44. - Cherry-pick reduction processing fix from PR llvm#85807. - Update argument lists of modified functions. - Rewrite `ReductionProcessor::addReductionSym` based on new clause structures. - Remove leftover references to `RIManager`. - Op renames to match new approach. Co-authored-by: Jan Leyonberg <jan_sjodin@yahoo.com>
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -907,27 +907,37 @@ bool ClauseProcessor::processMap(
 bool ClauseProcessor::processTargetReduction(
     llvm::SmallVector<const Fortran::semantics::Symbol *> &reductionSymbols)
     const {
-  return findRepeatableClause<ClauseTy::Reduction>(
-      [&](const ClauseTy::Reduction *reductionClause,
+  return findRepeatableClause<omp::clause::Reduction>(
+      [&](const omp::clause::Reduction &clause,
           const Fortran::parser::CharBlock &) {
         ReductionProcessor rp;
-        rp.addReductionSym(reductionClause->v, reductionSymbols);
+        rp.addReductionSym(clause, reductionSymbols);
       });
 }
 
 bool ClauseProcessor::processReduction(
     mlir::Location currentLocation,
-    llvm::SmallVectorImpl<mlir::Value> &reductionVars,
-    llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
-    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> *reductionSymbols)
-    const {
+    llvm::SmallVectorImpl<mlir::Value> &outReductionVars,
+    llvm::SmallVectorImpl<mlir::Attribute> &outReductionDeclSymbols,
+    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
+        *outReductionSymbols) const {
   return findRepeatableClause<omp::clause::Reduction>(
       [&](const omp::clause::Reduction &clause,
           const Fortran::parser::CharBlock &) {
+        llvm::SmallVector<mlir::Value> reductionVars;
+        llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
+        llvm::SmallVector<const Fortran::semantics::Symbol *> reductionSymbols;
         ReductionProcessor rp;
         rp.addDeclareReduction(currentLocation, converter, clause,
                                reductionVars, reductionDeclSymbols,
-                               reductionSymbols);
+                               outReductionSymbols ? &reductionSymbols
+                                                   : nullptr);
+        llvm::copy(reductionVars, std::back_inserter(outReductionVars));
+        llvm::copy(reductionDeclSymbols,
+                   std::back_inserter(outReductionDeclSymbols));
+        if (outReductionSymbols)
+          llvm::copy(reductionSymbols,
+                     std::back_inserter(*outReductionSymbols));
       });
 }
 
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1339,7 +1339,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
                  Fortran::parser::OmpClause::Defaultmap>(
       currentLocation, llvm::omp::Directive::OMPD_target);
 
-  DataSharingProcessor localDSP(converter, clauseList, eval);
+  DataSharingProcessor localDSP(converter, semaCtx, clauseList, eval);
   DataSharingProcessor &actualDSP = dsp ? *dsp : localDSP;
   actualDSP.processStep1();
 
@@ -1458,8 +1458,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
       /*teams_thread_limit=*/nullptr, /*num_threads=*/nullptr);
 
   genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSymTypes,
-                    mapSymLocs, mapSymbols, currentLocation, clauseList,
-                    actualDSP);
+                    mapSymLocs, mapSymbols, currentLocation, actualDSP);
 
   return targetOp;
 }
@@ -2074,7 +2073,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
   }();
 
   bool validDirective = false;
-  DataSharingProcessor dsp(converter, loopOpClauseList, eval);
+  DataSharingProcessor dsp(converter, semaCtx, loopOpClauseList, eval);
 
   if (llvm::omp::topTaskloopSet.test(ompDirective)) {
     validDirective = true;
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -67,17 +67,11 @@ ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType(
 }
 
 void ReductionProcessor::addReductionSym(
-    const Fortran::parser::OmpReductionClause &reduction,
+    const omp::clause::Reduction &reduction,
     llvm::SmallVector<const Fortran::semantics::Symbol *> &symbols) {
-  const auto &objectList{std::get<Fortran::parser::OmpObjectList>(reduction.t)};
-
-  for (const Fortran::parser::OmpObject &ompObject : objectList.v) {
-    if (const auto *name{
-            Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
-      if (const Fortran::semantics::Symbol * symbol{name->symbol})
-        symbols.push_back(symbol);
-    }
-  }
+  const auto &objectList{std::get<omp::ObjectList>(reduction.t)};
+  llvm::transform(objectList, std::back_inserter(symbols),
+                  [](const Object &object) { return object.id(); });
 }
 
 bool ReductionProcessor::supportedIntrinsicProcReduction(
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/lib/Lower/OpenMP/ReductionProcessor.h
@@ -16,7 +16,6 @@
 #include "Clauses.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
-#include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/symbol.h"
 #include "flang/Semantics/type.h"
 #include "mlir/IR/Location.h"
@@ -106,7 +105,7 @@ class ReductionProcessor {
                                           mlir::Value op2);
 
   static void addReductionSym(
-      const Fortran::parser::OmpReductionClause &reduction,
+      const omp::clause::Reduction &reduction,
       llvm::SmallVector<const Fortran::semantics::Symbol *> &symbols);
 
   /// Creates an OpenMP reduction declaration and inserts it into the provided
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1125,22 +1125,19 @@ convertOmpWsloop(
   tempTerminator->eraseFromParent();
   builder.restoreIP(nextInsertionPoint);
 
-  if (!ompBuilder->Config.isGPU())
-    ompBuilder->RIManager.clear();
-
   return success();
 }
 
 static LogicalResult
-convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
+convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
                  LLVM::ModuleTranslation &moduleTranslation) {
   llvm::OpenMPIRBuilder::InsertPointTy redAllocaIP =
     findAllocaInsertPoint(builder, moduleTranslation);
   SmallVector<OwningReductionGen> owningReductionGens;
   SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
   SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
 
-  return convertOmpWsLoop(opInst, builder, moduleTranslation, redAllocaIP,
+  return convertOmpWsloop(opInst, builder, moduleTranslation, redAllocaIP,
                           owningReductionGens, owningAtomicReductionGens,
                           reductionInfos);
 }
@@ -1414,9 +1411,6 @@ convertOmpParallel(Operation &opInst1, llvm::IRBuilderBase &builder,
       ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
                                  ifCond, numThreads, pbKind, isCancellable));
 
-  if (!ompBuilder->Config.isGPU())
-    ompBuilder->RIManager.clear();
-
   return bodyGenStatus;
 }
 
@@ -3330,10 +3324,9 @@ class ConversionDispatchList {
   }
 };
 
-static LogicalResult convertOmpDistributeParallelWsLoop(
-                                                        Operation *op,
-    omp::DistributeOp distribute, omp::ParallelOp parallel,
-    omp::WsLoopOp wsloop, llvm::IRBuilderBase &builder,
+static LogicalResult convertOmpDistributeParallelWsloop(
+    Operation *op, omp::DistributeOp distribute, omp::ParallelOp parallel,
+    omp::WsloopOp wsloop, llvm::IRBuilderBase &builder,
     LLVM::ModuleTranslation &moduleTranslation,
     ConversionDispatchList &dispatchList);
 
@@ -3534,10 +3527,10 @@ convertInternalTargetOp(Operation *op, llvm::IRBuilderBase &builder,
 
   omp::DistributeOp distribute;
   omp::ParallelOp parallel;
-  omp::WsLoopOp wsloop;
+  omp::WsloopOp wsloop;
   // Match composite constructs
   if (matchOpNest(op, distribute, parallel, wsloop)) {
-    return convertOmpDistributeParallelWsLoop(op, distribute, parallel, wsloop,
+    return convertOmpDistributeParallelWsloop(op, distribute, parallel, wsloop,
                                               builder, moduleTranslation,
                                               dispatchList);
   }
@@ -3586,9 +3579,9 @@ class OpenMPDialectLLVMIRTranslationInterface
 // Implementation converting a nest of operations in a single function. This
 // just overrides the parallel and wsloop dispatches but does the normal
 // lowering for now.
-static LogicalResult convertOmpDistributeParallelWsLoop(
+static LogicalResult convertOmpDistributeParallelWsloop(
     Operation *op, omp::DistributeOp distribute, omp::ParallelOp parallel,
-    omp::WsLoopOp wsloop, llvm::IRBuilderBase &builder,
+    omp::WsloopOp wsloop, llvm::IRBuilderBase &builder,
     LLVM::ModuleTranslation &moduleTranslation,
     ConversionDispatchList &dispatchList) {
 
@@ -3599,25 +3592,22 @@ static LogicalResult convertOmpDistributeParallelWsLoop(
   llvm::OpenMPIRBuilder::InsertPointTy redAllocaIP;
 
   // Convert wsloop alternative implementation
-  ConvertFunctionTy convertWsLoop = [&redAllocaIP, &owningReductionGens,
-                                     &owningAtomicReductionGens,
-                                     &reductionInfos](
-                                        Operation *op,
-                                        llvm::IRBuilderBase &builder,
-                                        LLVM::ModuleTranslation
-                                            &moduleTranslation) {
-    if (!isa<omp::WsLoopOp>(op)) {
-      return std::make_pair(false, failure());
-    }
+  ConvertFunctionTy convertWsloop =
+      [&redAllocaIP, &owningReductionGens, &owningAtomicReductionGens,
+       &reductionInfos](Operation *op, llvm::IRBuilderBase &builder,
+                        LLVM::ModuleTranslation &moduleTranslation) {
+        if (!isa<omp::WsloopOp>(op)) {
+          return std::make_pair(false, failure());
+        }
 
-    LogicalResult result = convertOmpWsLoop(
-        *op, builder, moduleTranslation, redAllocaIP, owningReductionGens,
-        owningAtomicReductionGens, reductionInfos);
-    return std::make_pair(true, result);
-  };
+        LogicalResult result = convertOmpWsloop(
+            *op, builder, moduleTranslation, redAllocaIP, owningReductionGens,
+            owningAtomicReductionGens, reductionInfos);
+        return std::make_pair(true, result);
+      };
 
   // Push the new alternative functions
-  dispatchList.pushConversionFunction(convertWsLoop);
+  dispatchList.pushConversionFunction(convertWsloop);
 
   // Lower the current distribute operation
   LogicalResult result = convertOmpDistribute(*op, builder, moduleTranslation,
diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
@@ -6,7 +6,8 @@
 module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
   llvm.func @target_parallel_wsloop(%arg0: !llvm.ptr) attributes {
     target_cpu = "gfx90a",
-    target_features = #llvm.target_features<["+gfx9-insts", "+wavefrontsize64"]>
+    target_features = #llvm.target_features<["+gfx9-insts", "+wavefrontsize64"]>,
+    omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>
   } {
     omp.parallel {
       %loop_ub = llvm.mlir.constant(9 : i32) : i32
diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir
@@ -5,7 +5,9 @@
 
 module attributes {omp.is_target_device = true} {
   llvm.func @foo(i32)
-  llvm.func @omp_target_teams_shared_simple(%arg0 : i32)  {
+  llvm.func @omp_target_teams_shared_simple(%arg0 : i32) attributes {
+    omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>
+  } {
     omp.teams {
       llvm.call @foo(%arg0) : (i32) -> ()
       omp.terminator
diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
@@ -4,7 +4,9 @@
 // for nested omp do loop with collapse clause inside omp target region
 
 module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
-  llvm.func @target_collapsed_wsloop(%arg0: !llvm.ptr) {
+  llvm.func @target_collapsed_wsloop(%arg0: !llvm.ptr) attributes {
+    omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>
+  } {
     %loop_ub = llvm.mlir.constant(99 : i32) : i32
     %loop_lb = llvm.mlir.constant(0 : i32) : i32
     %loop_step = llvm.mlir.constant(1 : index) : i32
diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
@@ -4,7 +4,9 @@
 // for nested omp do loop inside omp target region
 
 module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
-  llvm.func @target_wsloop(%arg0: !llvm.ptr ){
+  llvm.func @target_wsloop(%arg0: !llvm.ptr) attributes {
+    omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>
+  } {
       %loop_ub = llvm.mlir.constant(9 : i32) : i32
       %loop_lb = llvm.mlir.constant(0 : i32) : i32
       %loop_step = llvm.mlir.constant(1 : i32) : i32
@@ -16,7 +18,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
     llvm.return
   }
 
-  llvm.func @target_empty_wsloop(){
+  llvm.func @target_empty_wsloop() attributes {
+    omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>
+  } {
       %loop_ub = llvm.mlir.constant(9 : i32) : i32
       %loop_lb = llvm.mlir.constant(0 : i32) : i32
       %loop_step = llvm.mlir.constant(1 : i32) : i32