Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ if(WITH_GPU)
endif(WITH_GPU)

if(USE_NNPACK)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB} "rt")
include(external/nnpack)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS})
endif(USE_NNPACK)

add_subdirectory(proto)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,24 @@ set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK")
find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include)
find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib)
find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib)
find_library(NNPACK_UKERNELS_LIB NAMES nnpack_ukernels PATHS ${NNPACK_ROOT}/lib)
find_library(NNPACK_CPUFEATURES_LIB NAMES cpufeatures PATHS ${NNPACK_ROOT}/lib)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cpufeatures is a library from ndk, and is special for Android. Just reminding here that we may have a better way to locate it in future.


if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB)
set(NNPACK_FOUND ON)
INCLUDE_DIRECTORIES(${NNPACK_INC_DIR})

set(NNPACK_LIBS)
list(APPEND NNPACK_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB})
if (NNPACK_UKERNELS_LIB)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In what case there is nnpack_ukernels, and what case there isn't?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If Android, the nnpack compilation generates the libnnpack_ukernels.a file. If not, there is no such file, all *.o files have been packaged into the libnnpack.a file.

list(APPEND NNPACK_LIBS ${NNPACK_UKERNELS_LIB})
endif()
if (NNPACK_CPUFEATURES_LIB)
list(APPEND NNPACK_LIBS ${NNPACK_CPUFEATURES_LIB})
endif()
if(NOT ANDROID)
list(APPEND NNPACK_LIBS "rt")
endif()
else()
message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})")
endif()
1 change: 0 additions & 1 deletion paddle/function/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ if(WITH_GPU)
endif()

if(USE_NNPACK)
include(nnpack/nnpack.cmake)
list(APPEND cpp_files nnpack/NNPACKConvOp.cpp)
if(WITH_TESTING)
add_unittest(NNPACKConvOpTest nnpack/NNPACKConvOpTest.cpp)
Expand Down
25 changes: 14 additions & 11 deletions paddle/function/nnpack/NNPACKConvOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/function/ConvOp.h"

DEFINE_bool(nnpack_allocate_outside,
false,
true,
"Allocate and free workspace memory outside the NNPACK interface.");
DEFINE_int32(nnpack_num_threads,
0,
Expand Down Expand Up @@ -58,18 +58,10 @@ class NNPACKConvFunction : public ConvFunctionBase {
workspaceBuffer_ = nullptr;
workspaceSize_ = 0;

threadpool_ = nullptr;
if (FLAGS_nnpack_num_threads) {
threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads);
VLOG(3) << "Number of threads "
<< pthreadpool_get_threads_count(threadpool_);
}
create_nnpack_threadpool();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this way, we cannot destroy the threadpool_ explicitly, and leave the work to OS?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this may not very good. However, the previous version, each NNPACKConvFunction object has a threadpool_ is a bug. When the program running, it will lead to creating many of threads. So, I changed threadpool_ to a static variable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can define threadpool like this:

template <DeviceType Device>
class NNPACKConvFunction : public ConvFunctionBase {
  ...
private:
  struct ThreadPool {
    ThreadPool() : threadpool_(nullptr) {}
    ~ThreadPool() {
      if (threadpool_) {
        pthreadpool_destroy(threadpool_);
      }
    }
    void create(int num_threads) {
      if (num_threads > 0 && threadpool_ == nullptr) {
        threadpool_ = pthreadpool_create(num_threads);		
        VLOG(3) << "Number of threads "		
                      << pthreadpool_get_threads_count(threadpool_);		
      }
    }

    pthreadpool_t threadpool_;
  };

private:
  static ThreadPool pool_;
};

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is better, can you fix it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK.

}

~NNPACKConvFunction() {
if (threadpool_) {
pthreadpool_destroy(threadpool_);
}
if (workspaceBuffer_) {
free(workspaceBuffer_);
}
Expand Down Expand Up @@ -225,14 +217,25 @@ class NNPACKConvFunction : public ConvFunctionBase {
}
}

static void create_nnpack_threadpool() {
if (FLAGS_nnpack_num_threads && threadpool_ == nullptr) {
threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads);
VLOG(3) << "Number of threads "
<< pthreadpool_get_threads_count(threadpool_);
}
}

private:
nnp_convolution_algorithm algorithm_;
nnp_convolution_transform_strategy transform_strategy_;
void* workspaceBuffer_;
size_t workspaceSize_;
pthreadpool_t threadpool_;
static pthreadpool_t threadpool_;
};

template <DeviceType Device>
pthreadpool_t NNPACKConvFunction<Device>::threadpool_ = nullptr;

REGISTER_TYPED_FUNC(NNPACKConv, CPU, NNPACKConvFunction);

} // namespace paddle