@@ -23,12 +23,12 @@ namespace paddle {
2323namespace platform {
2424namespace dynload {
2525
26- std::once_flag cudnn_dso_flag;
27- void * cudnn_dso_handle = nullptr ;
26+ extern std::once_flag cudnn_dso_flag;
27+ extern void * cudnn_dso_handle;
2828
2929#ifdef PADDLE_USE_DSO
3030
31- #define DYNAMIC_LOAD_CUDNN_WRAP (__name ) \
31+ #define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP (__name ) \
3232 struct DynLoad__ ##__name { \
3333 template <typename ... Args> \
3434 auto operator ()(Args... args) -> decltype(__name(args...)) { \
@@ -39,98 +39,93 @@ void* cudnn_dso_handle = nullptr;
3939 void * p_##__name = dlsym (cudnn_dso_handle, #__name); \
4040 return reinterpret_cast <cudnn_func>(p_##__name)(args...); \
4141 } \
42- } __name; /* struct DynLoad__##__name */
42+ }; \
43+ extern struct DynLoad__ ##__name __name
4344
4445#else
4546
46- #define DYNAMIC_LOAD_CUDNN_WRAP (__name ) \
47+ #define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP (__name ) \
4748 struct DynLoad__ ##__name { \
4849 template <typename ... Args> \
4950 auto operator ()(Args... args) -> decltype(__name(args...)) { \
5051 return __name (args...); \
5152 } \
52- } __name; /* struct DynLoad__##__name */
53+ }; \
54+ extern DynLoad__##__name __name
5355
5456#endif
5557
5658/* *
5759 * include all needed cudnn functions in HPPL
5860 * different cudnn version has different interfaces
5961 **/
60- // clang-format off
61- #define CUDNN_DNN_ROUTINE_EACH (__macro ) \
62- __macro (cudnnSetTensor4dDescriptor) \
63- __macro (cudnnSetTensor4dDescriptorEx) \
64- __macro (cudnnGetConvolutionNdForwardOutputDim) \
65- __macro (cudnnGetConvolutionForwardAlgorithm) \
66- __macro (cudnnCreateTensorDescriptor) \
67- __macro (cudnnDestroyTensorDescriptor) \
68- __macro (cudnnCreateFilterDescriptor) \
69- __macro (cudnnSetFilter4dDescriptor) \
70- __macro (cudnnSetPooling2dDescriptor) \
71- __macro (cudnnDestroyFilterDescriptor) \
72- __macro (cudnnCreateConvolutionDescriptor) \
73- __macro (cudnnCreatePoolingDescriptor) \
74- __macro (cudnnDestroyPoolingDescriptor) \
75- __macro (cudnnSetConvolution2dDescriptor) \
76- __macro (cudnnDestroyConvolutionDescriptor) \
77- __macro (cudnnCreate) \
78- __macro (cudnnDestroy) \
79- __macro (cudnnSetStream) \
80- __macro (cudnnActivationForward) \
81- __macro (cudnnConvolutionForward) \
82- __macro (cudnnConvolutionBackwardBias) \
83- __macro (cudnnGetConvolutionForwardWorkspaceSize) \
84- __macro (cudnnTransformTensor) \
85- __macro (cudnnPoolingForward) \
86- __macro (cudnnPoolingBackward) \
87- __macro (cudnnSoftmaxBackward) \
88- __macro (cudnnSoftmaxForward) \
89- __macro (cudnnGetVersion) \
90- __macro (cudnnGetErrorString)
91- CUDNN_DNN_ROUTINE_EACH (DYNAMIC_LOAD_CUDNN_WRAP)
92-
93- #define CUDNN_DNN_ROUTINE_EACH_R2 (__macro ) \
94- __macro (cudnnAddTensor) \
95- __macro (cudnnConvolutionBackwardData) \
96- __macro (cudnnConvolutionBackwardFilter)
97- CUDNN_DNN_ROUTINE_EACH_R2 (DYNAMIC_LOAD_CUDNN_WRAP)
62+ #define CUDNN_DNN_ROUTINE_EACH (__macro ) \
63+ __macro (cudnnSetTensor4dDescriptor); \
64+ __macro (cudnnSetTensor4dDescriptorEx); \
65+ __macro (cudnnGetConvolutionNdForwardOutputDim); \
66+ __macro (cudnnGetConvolutionForwardAlgorithm); \
67+ __macro (cudnnCreateTensorDescriptor); \
68+ __macro (cudnnDestroyTensorDescriptor); \
69+ __macro (cudnnCreateFilterDescriptor); \
70+ __macro (cudnnSetFilter4dDescriptor); \
71+ __macro (cudnnSetPooling2dDescriptor); \
72+ __macro (cudnnDestroyFilterDescriptor); \
73+ __macro (cudnnCreateConvolutionDescriptor); \
74+ __macro (cudnnCreatePoolingDescriptor); \
75+ __macro (cudnnDestroyPoolingDescriptor); \
76+ __macro (cudnnSetConvolution2dDescriptor); \
77+ __macro (cudnnDestroyConvolutionDescriptor); \
78+ __macro (cudnnCreate); \
79+ __macro (cudnnDestroy); \
80+ __macro (cudnnSetStream); \
81+ __macro (cudnnActivationForward); \
82+ __macro (cudnnConvolutionForward); \
83+ __macro (cudnnConvolutionBackwardBias); \
84+ __macro (cudnnGetConvolutionForwardWorkspaceSize); \
85+ __macro (cudnnTransformTensor); \
86+ __macro (cudnnPoolingForward); \
87+ __macro (cudnnPoolingBackward); \
88+ __macro (cudnnSoftmaxBackward); \
89+ __macro (cudnnSoftmaxForward); \
90+ __macro (cudnnGetVersion); \
91+ __macro (cudnnGetErrorString);
92+ CUDNN_DNN_ROUTINE_EACH (DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
93+
94+ #define CUDNN_DNN_ROUTINE_EACH_R2 (__macro ) \
95+ __macro (cudnnAddTensor); \
96+ __macro (cudnnConvolutionBackwardData); \
97+ __macro (cudnnConvolutionBackwardFilter);
98+ CUDNN_DNN_ROUTINE_EACH_R2 (DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
9899
99100// APIs available after R3:
100101#if CUDNN_VERSION >= 3000
101- #define CUDNN_DNN_ROUTINE_EACH_AFTER_R3 (__macro ) \
102- __macro (cudnnGetConvolutionBackwardFilterWorkspaceSize) \
103- __macro(cudnnGetConvolutionBackwardDataAlgorithm) \
104- __macro(cudnnGetConvolutionBackwardFilterAlgorithm) \
105- __macro(cudnnGetConvolutionBackwardDataWorkspaceSize)
106- CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DYNAMIC_LOAD_CUDNN_WRAP)
107- #undef CUDNN_DNN_ROUTINE_EACH_AFTER_R3
102+ #define CUDNN_DNN_ROUTINE_EACH_AFTER_R3 (__macro ) \
103+ __macro (cudnnGetConvolutionBackwardFilterWorkspaceSize); \
104+ __macro (cudnnGetConvolutionBackwardDataAlgorithm); \
105+ __macro (cudnnGetConvolutionBackwardFilterAlgorithm); \
106+ __macro (cudnnGetConvolutionBackwardDataWorkspaceSize);
107+ CUDNN_DNN_ROUTINE_EACH_AFTER_R3 (DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
108108#endif
109109
110-
111110// APIs available after R4:
112111#if CUDNN_VERSION >= 4007
113- #define CUDNN_DNN_ROUTINE_EACH_AFTER_R4 (__macro ) \
114- __macro (cudnnBatchNormalizationForwardTraining) \
115- __macro(cudnnBatchNormalizationForwardInference) \
116- __macro(cudnnBatchNormalizationBackward)
117- CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DYNAMIC_LOAD_CUDNN_WRAP)
118- #undef CUDNN_DNN_ROUTINE_EACH_AFTER_R4
112+ #define CUDNN_DNN_ROUTINE_EACH_AFTER_R4 (__macro ) \
113+ __macro (cudnnBatchNormalizationForwardTraining); \
114+ __macro (cudnnBatchNormalizationForwardInference); \
115+ __macro (cudnnBatchNormalizationBackward);
116+ CUDNN_DNN_ROUTINE_EACH_AFTER_R4 (DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
119117#endif
120118
121119// APIs in R5
122120#if CUDNN_VERSION >= 5000
123- #define CUDNN_DNN_ROUTINE_EACH_R5 (__macro ) \
124- __macro (cudnnCreateActivationDescriptor) \
125- __macro(cudnnSetActivationDescriptor) \
126- __macro(cudnnGetActivationDescriptor) \
127- __macro(cudnnDestroyActivationDescriptor)
128- CUDNN_DNN_ROUTINE_EACH_R5(DYNAMIC_LOAD_CUDNN_WRAP)
129- #undef CUDNN_DNN_ROUTINE_EACH_R5
121+ #define CUDNN_DNN_ROUTINE_EACH_R5 (__macro ) \
122+ __macro (cudnnCreateActivationDescriptor); \
123+ __macro (cudnnSetActivationDescriptor); \
124+ __macro (cudnnGetActivationDescriptor); \
125+ __macro (cudnnDestroyActivationDescriptor);
126+ CUDNN_DNN_ROUTINE_EACH_R5 (DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
130127#endif
131128
132- #undef CUDNN_DNN_ROUTINE_EACH
133- // clang-format on
134129} // namespace dynload
135130} // namespace platform
136131} // namespace paddle
0 commit comments