Skip to content

Add cmake opencv 4 support #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ if(POLICY CMP0054)
cmake_policy(SET CMP0054 NEW)
endif()

set(CMAKE_INSTALL_PREFIX /usr/local/include/caffe)

# ---[ Caffe project
project(Caffe C CXX)

Expand All @@ -32,7 +34,7 @@ caffe_option(USE_CUDNN "Build Caffe with cuDNN library support" ON IF NOT CPU_ON
caffe_option(USE_NCCL "Build Caffe with NCCL library support" OFF)
caffe_option(BUILD_SHARED_LIBS "Build shared libraries" ON)
caffe_option(BUILD_python "Build Python wrapper" ON)
set(python_version "2" CACHE STRING "Specify which Python version to use")
set(python_version "3" CACHE STRING "Specify which Python version to use")
caffe_option(BUILD_matlab "Build Matlab wrapper" OFF IF UNIX OR APPLE)
caffe_option(BUILD_docs "Build documentation" ON IF UNIX OR APPLE)
caffe_option(BUILD_python_layer "Build the Caffe Python layer" ON)
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ CXXFLAGS += -MMD -MP
# Complete build flags.
COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir))
CXXFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS)
NVCCFLAGS += -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
NVCCFLAGS += -D_FORCE_INLINES -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
# mex may invoke an older gcc that is too liberal with -Wuninitalized
MATLAB_CXXFLAGS := $(CXXFLAGS) -Wno-uninitialized
LINKFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS)
Expand Down
18 changes: 12 additions & 6 deletions cmake/Cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,25 @@ endif ()
# set(Caffe_known_gpu_archs "30 35 50 52 60 61")
# set(Caffe_known_gpu_archs "20 21(20) 30 35 50 60 61")
# Fermi (3.2 <= CUDA <= 8)
set(FERMI "20 21(20)")
# set(FERMI "20 21(20)")
# Kepler (CUDA >= 5)
set(KEPLER "30 35 37")
set(KEPLER "35 37") # set(KEPLER "30 35 37") # This crashes with CUDA 10
# Maxwell (CUDA >= 6)
set(MAXWELL "50 52 53")
# Pascal (CUDA >= 8)
set(PASCAL "60 61 62")
# Volta (CUDA >= 9)
set(VOLTA "70") # set(VOLTA "70 71 72") # This crashes with CUDA 10
set(VOLTA "70 72") # set(VOLTA "70 71 72") # This crashes with CUDA 10
# Turing (CUDA >= 10)
set(TURING "75")
# Ampere (CUDA >= 11)
set(AMPERE "80 86")
if (UNIX AND NOT APPLE)
set(Caffe_known_gpu_archs "${KEPLER} ${MAXWELL} ${PASCAL} ${VOLTA} ${TURING}")
set(Caffe_known_gpu_archs "${KEPLER} ${MAXWELL} ${PASCAL} ${VOLTA} ${TURING} ${AMPERE}")
# set(Caffe_known_gpu_archs "${FERMI} ${KEPLER} ${MAXWELL} ${PASCAL} ${VOLTA} ${TURING}")
# set(Caffe_known_gpu_archs "20 21(20) 30 35 50 52 60 61")
elseif (WIN32)
set(Caffe_known_gpu_archs "${KEPLER} ${MAXWELL} ${PASCAL} ${VOLTA} ${TURING}")
set(Caffe_known_gpu_archs "${KEPLER} ${MAXWELL} ${PASCAL} ${VOLTA} ${TURING} ${AMPERE}")
endif ()


Expand Down Expand Up @@ -227,7 +229,11 @@ function(detect_cuDNN)
set(HAVE_CUDNN TRUE PARENT_SCOPE)
set(CUDNN_FOUND TRUE PARENT_SCOPE)

file(READ ${CUDNN_INCLUDE}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
if(EXISTS "${CUDNN_INCLUDE}/cudnn_version.h")
file(READ ${CUDNN_INCLUDE}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS)
else()
file(READ ${CUDNN_INCLUDE}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
endif()

# cuDNN v3 and beyond
string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
Expand Down
2 changes: 2 additions & 0 deletions examples/cpp_classification/classification.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "../../include/caffe/util/opencv4.hpp"
#endif // USE_OPENCV

#include <algorithm>
#include <iosfwd>
#include <memory>
Expand Down
1 change: 1 addition & 0 deletions include/caffe/data_transformer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class DataTransformer {
Blob<Dtype>* transformed_blob);

#ifdef USE_OPENCV
#include "./util/opencv4.hpp"
/**
* @brief Applies the transformation defined in the data layer's
* transform_param block to a vector of Mat.
Expand Down
2 changes: 2 additions & 0 deletions include/caffe/layers/memory_data_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ class MemoryDataLayer : public BaseDataLayer<Dtype> {
#ifdef USE_OPENCV
virtual void AddMatVector(const vector<cv::Mat>& mat_vector,
const vector<int>& labels);

#include "../util/opencv4.hpp"
#endif // USE_OPENCV

// Reset should accept const pointers, but can't, because the memory
Expand Down
1 change: 1 addition & 0 deletions include/caffe/util/io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ bool DecodeDatumNative(Datum* datum);
bool DecodeDatum(Datum* datum, bool is_color);

#ifdef USE_OPENCV
#include "./opencv4.hpp"
cv::Mat ReadImageToCVMat(const string& filename,
const int height, const int width, const bool is_color);

Expand Down
10 changes: 10 additions & 0 deletions include/caffe/util/opencv4.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#include <opencv2/opencv.hpp>

#if (defined(CV_MAJOR_VERSION) && CV_MAJOR_VERSION > 3)
#define OPENCV_VERSION4
#endif

#ifdef OPENCV_VERSION4
#define CV_LOAD_IMAGE_COLOR cv::IMREAD_COLOR
#define CV_LOAD_IMAGE_GRAYSCALE cv::IMREAD_GRAYSCALE
#endif
1 change: 1 addition & 0 deletions src/caffe/data_transformer.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#include "../../include/caffe/util/opencv4.hpp"
#endif // USE_OPENCV

#include <string>
Expand Down
58 changes: 57 additions & 1 deletion src/caffe/layers/cudnn_conv_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,21 @@ void CuDNNConvolutionLayer<Dtype>::Reshape(
const int stride_h = stride_data[0];
const int stride_w = stride_data[1];

// Note: Copied from https://github.com/Qengineering/caffe/tree/ssd/src/caffe/layers
#if CUDNN_VERSION_MIN(8, 0, 0)
int RetCnt;
bool found_conv_algorithm;
size_t free_memory, total_memory;
cudnnConvolutionFwdAlgoPerf_t fwd_algo_pref_[4];
cudnnConvolutionBwdDataAlgoPerf_t bwd_data_algo_pref_[4];

//get memory sizes
cudaMemGetInfo(&free_memory, &total_memory);
#else
// Specify workspace limit for kernels directly until we have a
// planning strategy and a rewrite of Caffe's GPU memory mangagement
size_t workspace_limit_bytes = 8*1024*1024;
#endif

for (int i = 0; i < bottom.size(); i++) {
cudnn::setTensor4dDesc<Dtype>(&bottom_descs_[i],
Expand All @@ -127,6 +139,50 @@ void CuDNNConvolutionLayer<Dtype>::Reshape(
filter_desc_, pad_h, pad_w,
stride_h, stride_w);

// Note: Copied from https://github.com/Qengineering/caffe/tree/ssd/src/caffe/layers
#if CUDNN_VERSION_MIN(8, 0, 0)
// choose forward algorithm for filter
// in forward filter the CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED is not implemented in cuDNN 8
CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm_v7(handle_[0], bottom_descs_[i], filter_desc_, conv_descs_[i], top_descs_[i], 4, &RetCnt, fwd_algo_pref_));

found_conv_algorithm = false;
for(int n=0;n<RetCnt;n++){
if (fwd_algo_pref_[n].status == CUDNN_STATUS_SUCCESS &&
fwd_algo_pref_[n].algo != CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
fwd_algo_pref_[n].memory < free_memory){
found_conv_algorithm = true;
fwd_algo_[i] = fwd_algo_pref_[n].algo;
workspace_fwd_sizes_[i] = fwd_algo_pref_[n].memory;
break;
}
}
if(!found_conv_algorithm) LOG(ERROR) << "cuDNN did not return a suitable algorithm for convolution.";
else{
// choose backward algorithm for filter
// for better or worse, just a fixed constant due to the missing
// cudnnGetConvolutionBackwardFilterAlgorithm in cuDNN version 8.0
bwd_filter_algo_[i] = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1;
//twice the amount of the forward search to be save
workspace_bwd_filter_sizes_[i] = 2*workspace_fwd_sizes_[i];
}

// choose backward algo for data
CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm_v7(handle_[0], filter_desc_, top_descs_[i], conv_descs_[i], bottom_descs_[i], 4, &RetCnt, bwd_data_algo_pref_));

found_conv_algorithm = false;
for(int n=0;n<RetCnt;n++){
if (bwd_data_algo_pref_[n].status == CUDNN_STATUS_SUCCESS &&
bwd_data_algo_pref_[n].algo != CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD &&
bwd_data_algo_pref_[n].algo != CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED &&
bwd_data_algo_pref_[n].memory < free_memory){
found_conv_algorithm = true;
bwd_data_algo_[i] = bwd_data_algo_pref_[n].algo;
workspace_bwd_data_sizes_[i] = bwd_data_algo_pref_[n].memory;
break;
}
}
if(!found_conv_algorithm) LOG(ERROR) << "cuDNN did not return a suitable algorithm for convolution.";
#else
// choose forward and backward algorithms + workspace(s)
CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(handle_[0],
bottom_descs_[i],
Expand Down Expand Up @@ -166,6 +222,7 @@ void CuDNNConvolutionLayer<Dtype>::Reshape(
CUDNN_CHECK(cudnnGetConvolutionBackwardDataWorkspaceSize(handle_[0],
filter_desc_, top_descs_[i], conv_descs_[i], bottom_descs_[i],
bwd_data_algo_[i], &workspace_bwd_data_sizes_[i]) );
#endif
}

// reduce over all workspace sizes to get a maximum to allocate / reallocate
Expand Down Expand Up @@ -252,7 +309,6 @@ CuDNNConvolutionLayer<Dtype>::~CuDNNConvolutionLayer() {
}

cudaFree(workspaceData);
delete [] workspace;
delete [] stream_;
delete [] handle_;
delete [] fwd_algo_;
Expand Down
Loading