diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index cd4b25a17722c..c71fd598d5c8a 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2144,11 +2144,10 @@ static void genStandaloneSimd(lower::AbstractConverter &converter, genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps, simdReductionSyms); - // TODO: Support delayed privatization. DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/false, symTable); - dsp.processStep1(); + enableDelayedPrivatization, symTable); + dsp.processStep1(&simdClauseOps); mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; @@ -2156,7 +2155,8 @@ static void genStandaloneSimd(lower::AbstractConverter &converter, loopNestClauseOps, iv); EntryBlockArgs simdArgs; - // TODO: Add private syms and vars. + simdArgs.priv.syms = dsp.getDelayedPrivSymbols(); + simdArgs.priv.vars = simdClauseOps.privateVars; simdArgs.reduction.syms = simdReductionSyms; simdArgs.reduction.vars = simdClauseOps.reductionVars; auto simdOp = diff --git a/flang/test/Lower/OpenMP/order-clause.f90 b/flang/test/Lower/OpenMP/order-clause.f90 index 717d9740c56f8..a30d82979021d 100644 --- a/flang/test/Lower/OpenMP/order-clause.f90 +++ b/flang/test/Lower/OpenMP/order-clause.f90 @@ -4,15 +4,15 @@ !CHECK-LABEL: func.func @_QPsimd_order() { subroutine simd_order - !CHECK: omp.simd order(reproducible:concurrent) { + !CHECK: omp.simd order(reproducible:concurrent) private({{.*}}) { !$omp simd order(concurrent) do i = 1, 10 end do - !CHECK: omp.simd order(reproducible:concurrent) { + !CHECK: omp.simd order(reproducible:concurrent) private({{.*}}) { !$omp simd order(reproducible:concurrent) do i = 1, 10 end do - !CHECK: omp.simd order(unconstrained:concurrent) { + !CHECK: omp.simd order(unconstrained:concurrent) private({{.*}}) { !$omp simd order(unconstrained:concurrent) do i = 1, 10 end do diff --git a/flang/test/Lower/OpenMP/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/parallel-private-clause.f90 index 8dc843fc9d96a..7114314df05d3 100644 --- a/flang/test/Lower/OpenMP/parallel-private-clause.f90 +++ b/flang/test/Lower/OpenMP/parallel-private-clause.f90 @@ -5,6 +5,23 @@ ! RUN: bbc --use-desc-for-alloc=false -fopenmp -emit-hlfir %s -o - \ ! RUN: | FileCheck %s --check-prefix=FIRDialect +! FIRDialect: omp.private {type = private} @_QFsimd_loop_1Er_private_ref_box_heap_f32 {{.*}} alloc { +! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} +! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> +! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> +! FIRDialect: [[R_DECL:%.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs, uniq_name = "{{.*}}r"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +! FIRDialect: omp.yield([[R_DECL]]#0 : !fir.ref>>) +! FIRDialect: } dealloc { +! FIRDialect: ^bb0([[R_DECL:%.*]]: !fir.ref>>): +! FIRDialect: {{%.*}} = fir.load [[R_DECL]] : !fir.ref>> +! FIRDialect: fir.if {{%.*}} { +! FIRDialect: [[LD:%.*]] = fir.load [[R_DECL]] : !fir.ref>> +! FIRDialect: [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box>) -> !fir.heap +! FIRDialect: fir.freemem [[AD]] : !fir.heap +! FIRDialect: fir.store {{%.*}} to [[R_DECL]] : !fir.ref>> +! FIRDialect: omp.yield +! FIRDialect: } + !FIRDialect: omp.private {type = private} @[[DERIVED_PRIVATIZER:_QFprivate_clause_derived_typeEt_private_ref_rec__QFprivate_clause_derived_typeTmy_type]] : !fir.ref}>> alloc { !FIRDialect: ^bb0(%{{.*}}: !fir.ref}>>): !FIRDialect: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.type<_QFprivate_clause_derived_typeTmy_type{t_i:i32,t_arr:!fir.array<5xi32>}> {bindc_name = "t", pinned, uniq_name = "_QFprivate_clause_derived_typeEt"} @@ -246,7 +263,6 @@ subroutine parallel_pointer() !$omp end parallel end subroutine parallel_pointer - !FIRDialect-LABEL: func @_QPsimple_loop_1() subroutine simple_loop_1 integer :: i @@ -354,20 +370,17 @@ subroutine simple_loop_3 ! FIRDialect: omp.terminator end subroutine + !CHECK-LABEL: func @_QPsimd_loop_1() subroutine simd_loop_1 integer :: i real, allocatable :: r; - ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} - ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> - ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> - ! FIRDialect: [[R_DECL:%.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs, uniq_name = "{{.*}}r"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) ! FIRDialect: %[[LB:.*]] = arith.constant 1 : i32 ! FIRDialect: %[[UB:.*]] = arith.constant 9 : i32 ! FIRDialect: %[[STEP:.*]] = arith.constant 1 : i32 - ! FIRDialect: omp.simd { + ! FIRDialect: omp.simd private({{.*}}) { ! FIRDialect-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { !$OMP SIMD PRIVATE(r) do i=1, 9 @@ -378,10 +391,5 @@ subroutine simd_loop_1 end do !$OMP END SIMD ! FIRDialect: omp.yield - ! FIRDialect: {{%.*}} = fir.load [[R_DECL]]#0 : !fir.ref>> - ! FIRDialect: fir.if {{%.*}} { - ! FIRDialect: [[LD:%.*]] = fir.load [[R_DECL]]#0 : !fir.ref>> - ! FIRDialect: [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box>) -> !fir.heap - ! FIRDialect: fir.freemem [[AD]] : !fir.heap - ! FIRDialect: fir.store {{%.*}} to [[R_DECL]]#0 : !fir.ref>> + end subroutine diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90 index d92f06cebfdbe..0345ace24aaa0 100644 --- a/flang/test/Lower/OpenMP/simd.f90 +++ b/flang/test/Lower/OpenMP/simd.f90 @@ -13,7 +13,7 @@ subroutine simd ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: %[[UB:.*]] = arith.constant 9 : i32 ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK-NEXT: omp.simd { + ! CHECK-NEXT: omp.simd private({{.*}}) { ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i=1, 9 ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref @@ -33,7 +33,7 @@ subroutine simd_with_if_clause(n, threshold) ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0 ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.simd if(%[[COND:.*]]) { + ! CHECK: omp.simd if(%[[COND:.*]]) private({{.*}}) { ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref @@ -52,7 +52,7 @@ subroutine simd_with_simdlen_clause(n, threshold) ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0 ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.simd simdlen(2) { + ! CHECK: omp.simd simdlen(2) private({{.*}}) { ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref @@ -72,7 +72,7 @@ subroutine simd_with_simdlen_clause_from_param(n, threshold) ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0 ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.simd simdlen(2) { + ! CHECK: omp.simd simdlen(2) private({{.*}}) { ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref @@ -92,7 +92,7 @@ subroutine simd_with_simdlen_clause_from_expr_from_param(n, threshold) ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0 ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.simd simdlen(6) { + ! CHECK: omp.simd simdlen(6) private({{.*}}) { ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref @@ -111,7 +111,7 @@ subroutine simd_with_safelen_clause(n, threshold) ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0 ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.simd safelen(2) { + ! CHECK: omp.simd safelen(2) private({{.*}}) { ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref @@ -131,7 +131,7 @@ subroutine simd_with_safelen_clause_from_expr_from_param(n, threshold) ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0 ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.simd safelen(6) { + ! CHECK: omp.simd safelen(6) private({{.*}}) { ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref @@ -150,7 +150,7 @@ subroutine simd_with_simdlen_safelen_clause(n, threshold) ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0 ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.simd safelen(2) simdlen(1) { + ! CHECK: omp.simd safelen(2) simdlen(1) private({{.*}}) { ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref @@ -171,7 +171,7 @@ subroutine simd_with_collapse_clause(n) ! CHECK: %[[LOWER_J:.*]] = arith.constant 1 : i32 ! CHECK: %[[UPPER_J:.*]] = fir.load %[[PARAM_ARG:.*]] : !fir.ref ! CHECK: %[[STEP_J:.*]] = arith.constant 1 : i32 - ! CHECK: omp.simd { + ! CHECK: omp.simd private({{.*}}) { ! CHECK-NEXT: omp.loop_nest (%[[ARG_0:.*]], %[[ARG_1:.*]]) : i32 = ( ! CHECK-SAME: %[[LOWER_I]], %[[LOWER_J]]) to ( ! CHECK-SAME: %[[UPPER_I]], %[[UPPER_J]]) inclusive step ( @@ -235,7 +235,7 @@ subroutine simd_with_nontemporal_clause(n) !CHECK: %[[LB:.*]] = arith.constant 1 : i32 !CHECK: %[[UB:.*]] = fir.load %{{.*}}#0 : !fir.ref !CHECK: %[[STEP:.*]] = arith.constant 1 : i32 - !CHECK: omp.simd nontemporal(%[[A_DECL]]#1, %[[C_DECL]]#1 : !fir.ref, !fir.ref) { + !CHECK: omp.simd nontemporal(%[[A_DECL]]#1, %[[C_DECL]]#1 : !fir.ref, !fir.ref) private({{.*}}) { !CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { !$OMP SIMD NONTEMPORAL(A, C) do i = 1, n @@ -249,16 +249,14 @@ subroutine lastprivate_with_simd !CHECK: %[[VAR_SUM:.*]] = fir.alloca f32 {bindc_name = "sum", uniq_name = "_QFlastprivate_with_simdEsum"} !CHECK: %[[VAR_SUM_DECLARE:.*]]:2 = hlfir.declare %[[VAR_SUM]] {{.*}} -!CHECK: %[[VAR_SUM_PINNED:.*]] = fir.alloca f32 {bindc_name = "sum", pinned, uniq_name = "_QFlastprivate_with_simdEsum"} -!CHECK: %[[VAR_SUM_PINNED_DECLARE:.*]]:2 = hlfir.declare %[[VAR_SUM_PINNED]] {{.*}} - implicit none integer :: i real :: sum -!CHECK: omp.simd { +!CHECK: omp.simd private(@_QFlastprivate_with_simdEsum_private_ref_f32 %[[VAR_SUM_DECLARE]]#0 -> %[[VAR_SUM_PINNED:.*]], @{{.*}}) { !CHECK: omp.loop_nest (%[[ARG:.*]]) : i32 = ({{.*}} to ({{.*}}) inclusive step ({{.*}}) { +!CHECK: %[[VAR_SUM_PINNED_DECLARE:.*]]:2 = hlfir.declare %[[VAR_SUM_PINNED]] {{.*}} !CHECK: %[[ADD_RESULT:.*]] = arith.addi {{.*}} !CHECK: %[[ADD_RESULT_CONVERT:.*]] = fir.convert %[[ADD_RESULT]] : (i32) -> f32 !CHECK: hlfir.assign %[[ADD_RESULT_CONVERT]] to %[[VAR_SUM_PINNED_DECLARE]]#0 : f32, !fir.ref @@ -283,7 +281,7 @@ subroutine simd_with_reduction_clause ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: %[[UB:.*]] = arith.constant 9 : i32 ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK-NEXT: omp.simd reduction(@[[REDUCER]] %[[X:.*]]#0 -> %[[X_RED:.*]] : !fir.ref) { + ! CHECK-NEXT: omp.simd private({{.*}}) reduction(@[[REDUCER]] %[[X:.*]]#0 -> %[[X_RED:.*]] : !fir.ref) { ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { !$omp simd reduction(+:x) do i=1, 9 diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 1edf47dff8c4a..2b57a8dce3de5 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -5203,12 +5203,7 @@ void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop, Function *F = CanonicalLoop->getFunction(); // Define where if branch should be inserted - Instruction *SplitBefore; - if (Instruction::classof(IfCond)) { - SplitBefore = dyn_cast(IfCond); - } else { - SplitBefore = CanonicalLoop->getPreheader()->getTerminator(); - } + Instruction *SplitBefore = CanonicalLoop->getPreheader()->getTerminator(); // TODO: We should not rely on pass manager. Currently we use pass manager // only for getting llvm::Loop which corresponds to given CanonicalLoopInfo diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 3d62b3218869e..ca7e08e9f18b5 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -2116,7 +2116,7 @@ void SimdOp::build(OpBuilder &builder, OperationState &state, makeArrayAttr(ctx, clauses.alignments), clauses.ifExpr, /*linear_vars=*/{}, /*linear_step_vars=*/{}, clauses.nontemporalVars, clauses.order, clauses.orderMod, - /*private_vars=*/{}, /*private_syms=*/nullptr, + clauses.privateVars, makeArrayAttr(ctx, clauses.privateSyms), clauses.reductionVars, makeDenseBoolArrayAttr(ctx, clauses.reductionByref), makeArrayAttr(ctx, clauses.reductionSyms), clauses.safelen, diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index a364098e0bd8a..d6112fa9af118 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -273,7 +273,6 @@ static LogicalResult checkImplementationStatus(Operation &op) { .Case([&](omp::SimdOp op) { checkLinear(op, result); checkNontemporal(op, result); - checkPrivate(op, result); checkReduction(op, result); }) .Case privateBlockArgs = + cast(*simdOp).getPrivateBlockArgs(); + SmallVector mlirPrivateVars; + SmallVector llvmPrivateVars; + SmallVector privateDecls; + mlirPrivateVars.reserve(privateBlockArgs.size()); + llvmPrivateVars.reserve(privateBlockArgs.size()); + collectPrivatizationDecls(simdOp, privateDecls); + + for (mlir::Value privateVar : simdOp.getPrivateVars()) + mlirPrivateVars.push_back(privateVar); + + llvm::OpenMPIRBuilder::InsertPointTy allocaIP = + findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + llvm::Expected afterAllocas = allocatePrivateVars( + builder, moduleTranslation, privateBlockArgs, privateDecls, + mlirPrivateVars, llvmPrivateVars, allocaIP); + if (handleError(afterAllocas, opInst).failed()) + return failure(); + // Generator of the canonical loop body. SmallVector loopInfos; SmallVector bodyInsertPoints; @@ -2331,7 +2350,9 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, order, simdlen, safelen); builder.restoreIP(afterIP); - return success(); + + return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(), + llvmPrivateVars, privateDecls); } /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. diff --git a/mlir/test/Target/LLVMIR/openmp-simd-private.mlir b/mlir/test/Target/LLVMIR/openmp-simd-private.mlir new file mode 100644 index 0000000000000..09d76f8edd007 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-simd-private.mlir @@ -0,0 +1,117 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +omp.private {type = private} @i_privatizer : !llvm.ptr alloc { +^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i", pinned} : (i64) -> !llvm.ptr + omp.yield(%1 : !llvm.ptr) +} + +// CHECK-LABEL: test_loop_var_privatization() +// Original (non-privatized) allocation for `i`. +// CHECK: %{{.*}} = alloca i32, i64 1, align 4 +// CHECK: %[[DUMMY:.*]] = alloca float, i64 1, align 4 +// CHECK: %[[PRIV_I:.*]] = alloca i32, i64 1, align 4 +// CHECK: br label %[[AFTER_ALLOC:.*]] + +// CHECK: [[AFTER_ALLOC]]: +// CHECK: br label %[[ENTRY:.*]] + +// CHECK: [[ENTRY]]: +// CHECK: br label %[[OMP_LOOP_PREHEADER:.*]] + +// CHECK: [[OMP_LOOP_PREHEADER]]: +// CHECK: br label %[[OMP_LOOP_HEADER:.*]] + +// CHECK: [[OMP_LOOP_HEADER]]: +// CHECK: %[[OMP_LOOP_IV:.*]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.*]], %[[OMP_LOOP_INC:.*]] ] +// CHECK: br label %[[OMP_LOOP_COND:.*]] + +// CHECK: [[OMP_LOOP_COND]]: +// CHECK: %[[OMP_LOOP_CMP:.*]] = icmp ult i32 %[[OMP_LOOP_IV]], 10 +// CHECK: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.*]], label %[[OMP_LOOP_EXIT:.*]] + +// CHECK: [[OMP_LOOP_BODY]]: +// CHECK: %[[IV_UPDATE:.*]] = mul i32 %[[OMP_LOOP_IV]], 1 +// CHECK: %[[IV_UPDATE_2:.*]] = add i32 %[[IV_UPDATE]], 1 +// CHECK: br label %[[OMP_SIMD_REGION:.*]] + +// CHECK: [[OMP_SIMD_REGION]]: +// CHECK: store i32 %[[IV_UPDATE_2]], ptr %[[PRIV_I]], align 4 +// CHECK: %[[DUMMY_VAL:.*]] = load float, ptr %[[DUMMY]], align 4 +// CHECK: %[[PRIV_I_VAL:.*]] = load i32, ptr %[[PRIV_I]], align 4 +// CHECK: %[[PRIV_I_VAL_FLT:.*]] = sitofp i32 %[[PRIV_I_VAL]] to float +// CHECK: %[[DUMMY_VAL_UPDATE:.*]] = fadd {{.*}} float %[[DUMMY_VAL]], %[[PRIV_I_VAL_FLT]] +// CHECK: store float %[[DUMMY_VAL_UPDATE]], ptr %[[DUMMY]], align 4, !llvm.access.group !1 +// CHECK: br label %[[OMP_REGION_CONT:.*]] + +// CHECK: [[OMP_REGION_CONT]]: +// CHECK: br label %[[OMP_LOOP_INC:.*]] + +// CHECK: [[OMP_LOOP_INC]]: +// CHECK: %[[OMP_LOOP_NEXT:.*]] = add nuw i32 %[[OMP_LOOP_IV]], 1 +// CHECK: br label %[[OMP_LOOP_HEADER]] + +// CHECK: [[OMP_LOOP_EXIT]]: + + +llvm.func @test_loop_var_privatization() attributes {fir.internal_name = "_QPtest_private_clause"} { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr + %3 = llvm.alloca %0 x f32 {bindc_name = "dummy"} : (i64) -> !llvm.ptr + %4 = llvm.mlir.constant(10 : i32) : i32 + %5 = llvm.mlir.constant(1 : i32) : i32 + omp.simd private(@i_privatizer %1 -> %arg0 : !llvm.ptr) { + omp.loop_nest (%arg1) : i32 = (%5) to (%4) inclusive step (%5) { + llvm.store %arg1, %arg0 : i32, !llvm.ptr + %8 = llvm.load %3 : !llvm.ptr -> f32 + %9 = llvm.load %arg0 : !llvm.ptr -> i32 + %10 = llvm.sitofp %9 : i32 to f32 + %11 = llvm.fadd %8, %10 {fastmathFlags = #llvm.fastmath} : f32 + llvm.store %11, %3 : f32, !llvm.ptr + omp.yield + } + } + llvm.return +} + +omp.private {type = private} @dummy_privatizer : !llvm.ptr alloc { +^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x f32 {bindc_name = "dummy", pinned} : (i64) -> !llvm.ptr + omp.yield(%1 : !llvm.ptr) +} + +// CHECK-LABEL: test_private_clause() +// Original (non-privatized) allocation for `i`. +// CHECK: %{{.*}} = alloca i32, i64 1, align 4 +// Original (non-privatized) allocation for `dummy`. +// CHECK: %{{.*}} = alloca float, i64 1, align 4 +// CHECK: %[[PRIV_DUMMY:.*]] = alloca float, i64 1, align 4 +// CHECK: %[[PRIV_I:.*]] = alloca i32, i64 1, align 4 + +// CHECK: omp.simd.region: +// CHECK-NOT: br label +// CHECK: store i32 %{{.*}}, ptr %[[PRIV_I]], align 4 +// CHECK: %{{.*}} = load float, ptr %[[PRIV_DUMMY]], align 4 +// CHECK: store float %{{.*}}, ptr %[[PRIV_DUMMY]], align 4 + +llvm.func @test_private_clause() attributes {fir.internal_name = "_QPtest_private_clause"} { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr + %3 = llvm.alloca %0 x f32 {bindc_name = "dummy"} : (i64) -> !llvm.ptr + %4 = llvm.mlir.constant(10 : i32) : i32 + %5 = llvm.mlir.constant(1 : i32) : i32 + omp.simd private(@dummy_privatizer %3 -> %arg0, @i_privatizer %1 -> %arg1 : !llvm.ptr, !llvm.ptr) { + omp.loop_nest (%arg2) : i32 = (%5) to (%4) inclusive step (%5) { + llvm.store %arg2, %arg1 : i32, !llvm.ptr + %8 = llvm.load %arg0 : !llvm.ptr -> f32 + %9 = llvm.load %arg1 : !llvm.ptr -> i32 + %10 = llvm.sitofp %9 : i32 to f32 + %11 = llvm.fadd %8, %10 {fastmathFlags = #llvm.fastmath} : f32 + llvm.store %11, %arg0 : f32, !llvm.ptr + omp.yield + } + } + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir index 83a0990d63162..f8c9d911e3034 100644 --- a/mlir/test/Target/LLVMIR/openmp-todo.mlir +++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir @@ -156,25 +156,6 @@ llvm.func @simd_nontemporal(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) { // ----- -omp.private {type = private} @x.privatizer : !llvm.ptr alloc { -^bb0(%arg0: !llvm.ptr): - %0 = llvm.mlir.constant(1 : i32) : i32 - %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr - omp.yield(%1 : !llvm.ptr) -} -llvm.func @simd_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) { - // expected-error@below {{not yet implemented: Unhandled clause privatization in omp.simd operation}} - // expected-error@below {{LLVM Translation failed for operation: omp.simd}} - omp.simd private(@x.privatizer %x -> %arg0 : !llvm.ptr) { - omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { - omp.yield - } - } - llvm.return -} - -// ----- - omp.declare_reduction @add_f32 : f32 init { ^bb0(%arg: f32):