@@ -551,9 +551,10 @@ CGOpenMPRuntimeGPU::getExecutionMode() const {
551
551
return CurrentExecutionMode;
552
552
}
553
553
554
- CGOpenMPRuntimeGPU::DataSharingMode
555
- CGOpenMPRuntimeGPU::getDataSharingMode() const {
556
- return CurrentDataSharingMode;
554
+ static CGOpenMPRuntimeGPU::DataSharingMode
555
+ getDataSharingMode(CodeGenModule &CGM) {
556
+ return CGM.getLangOpts().OpenMPCUDAMode ? CGOpenMPRuntimeGPU::CUDA
557
+ : CGOpenMPRuntimeGPU::Generic;
557
558
}
558
559
559
560
/// Check for inner (nested) SPMD construct, if any
@@ -751,9 +752,6 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
751
752
EntryFunctionState EST;
752
753
WrapperFunctionsMap.clear();
753
754
754
- [[maybe_unused]] bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
755
- assert(!IsBareKernel && "bare kernel should not be at generic mode");
756
-
757
755
// Emit target region as a standalone region.
758
756
class NVPTXPrePostActionTy : public PrePostActionTy {
759
757
CGOpenMPRuntimeGPU::EntryFunctionState &EST;
@@ -762,13 +760,15 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
762
760
NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST)
763
761
: EST(EST) {}
764
762
void Enter(CodeGenFunction &CGF) override {
765
- auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
763
+ auto &RT =
764
+ static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
766
765
RT.emitKernelInit(CGF, EST, /* IsSPMD */ false);
767
766
// Skip target region initialization.
768
767
RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
769
768
}
770
769
void Exit(CodeGenFunction &CGF) override {
771
- auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
770
+ auto &RT =
771
+ static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
772
772
RT.clearLocThreadIdInsertPt(CGF);
773
773
RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ false);
774
774
}
@@ -807,39 +807,25 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
807
807
ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_SPMD);
808
808
EntryFunctionState EST;
809
809
810
- bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
811
-
812
810
// Emit target region as a standalone region.
813
811
class NVPTXPrePostActionTy : public PrePostActionTy {
814
812
CGOpenMPRuntimeGPU &RT;
815
813
CGOpenMPRuntimeGPU::EntryFunctionState &EST;
816
- bool IsBareKernel;
817
- DataSharingMode Mode;
818
814
819
815
public:
820
816
NVPTXPrePostActionTy(CGOpenMPRuntimeGPU &RT,
821
- CGOpenMPRuntimeGPU::EntryFunctionState &EST,
822
- bool IsBareKernel)
823
- : RT(RT), EST(EST), IsBareKernel(IsBareKernel),
824
- Mode(RT.CurrentDataSharingMode) {}
817
+ CGOpenMPRuntimeGPU::EntryFunctionState &EST)
818
+ : RT(RT), EST(EST) {}
825
819
void Enter(CodeGenFunction &CGF) override {
826
- if (IsBareKernel) {
827
- RT.CurrentDataSharingMode = DataSharingMode::DS_CUDA;
828
- return;
829
- }
830
820
RT.emitKernelInit(CGF, EST, /* IsSPMD */ true);
831
821
// Skip target region initialization.
832
822
RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
833
823
}
834
824
void Exit(CodeGenFunction &CGF) override {
835
- if (IsBareKernel) {
836
- RT.CurrentDataSharingMode = Mode;
837
- return;
838
- }
839
825
RT.clearLocThreadIdInsertPt(CGF);
840
826
RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ true);
841
827
}
842
- } Action(*this, EST, IsBareKernel );
828
+ } Action(*this, EST);
843
829
CodeGen.setAction(Action);
844
830
IsInTTDRegion = true;
845
831
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
@@ -857,8 +843,7 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
857
843
assert(!ParentName.empty() && "Invalid target region parent name!");
858
844
859
845
bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
860
- bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
861
- if (Mode || IsBareKernel)
846
+ if (Mode)
862
847
emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
863
848
CodeGen);
864
849
else
@@ -882,9 +867,6 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
882
867
if (CGM.getLangOpts().NoGPULib || CGM.getLangOpts().OMPHostIRFile.empty())
883
868
return;
884
869
885
- if (CGM.getLangOpts().OpenMPCUDAMode)
886
- CurrentDataSharingMode = CGOpenMPRuntimeGPU::DS_CUDA;
887
-
888
870
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTargetDebug,
889
871
"__omp_rtl_debug_kind");
890
872
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTeamSubscription,
@@ -1048,7 +1030,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
1048
1030
void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
1049
1031
SourceLocation Loc,
1050
1032
bool WithSPMDCheck) {
1051
- if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic &&
1033
+ if (getDataSharingMode(CGM ) != CGOpenMPRuntimeGPU::Generic &&
1052
1034
getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
1053
1035
return;
1054
1036
@@ -1160,7 +1142,7 @@ void CGOpenMPRuntimeGPU::getKmpcFreeShared(
1160
1142
1161
1143
void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF,
1162
1144
bool WithSPMDCheck) {
1163
- if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic &&
1145
+ if (getDataSharingMode(CGM ) != CGOpenMPRuntimeGPU::Generic &&
1164
1146
getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
1165
1147
return;
1166
1148
@@ -1196,18 +1178,11 @@ void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF,
1196
1178
if (!CGF.HaveInsertPoint())
1197
1179
return;
1198
1180
1199
- bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
1200
-
1201
1181
Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1202
1182
/*Name=*/".zero.addr");
1203
1183
CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);
1204
1184
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1205
- // We don't emit any thread id function call in bare kernel, but because the
1206
- // outlined function has a pointer argument, we emit a nullptr here.
1207
- if (IsBareKernel)
1208
- OutlinedFnArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
1209
- else
1210
- OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
1185
+ OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
1211
1186
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1212
1187
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1213
1188
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -3298,7 +3273,7 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
3298
3273
3299
3274
void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
3300
3275
const Decl *D) {
3301
- if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic )
3276
+ if (getDataSharingMode(CGM ) != CGOpenMPRuntimeGPU::Generic )
3302
3277
return;
3303
3278
3304
3279
assert(D && "Expected function or captured|block decl.");
@@ -3407,7 +3382,7 @@ Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF,
3407
3382
VarTy, Align);
3408
3383
}
3409
3384
3410
- if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic )
3385
+ if (getDataSharingMode(CGM ) != CGOpenMPRuntimeGPU::Generic )
3411
3386
return Address::invalid();
3412
3387
3413
3388
VD = VD->getCanonicalDecl();
0 commit comments