Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
211f83f
set depthwise conv layer interface in python
NHZlX Jul 4, 2017
eeb17c2
add depthwise operation and depthwise conv layer
NHZlX Jul 4, 2017
efae51c
add the mobilenet gpu acceleration, cpu is in the process
NHZlX Jul 7, 2017
f4e7ae5
add mobilenet gpu grad test, the test is ok
NHZlX Jul 7, 2017
36e7800
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
NHZlX Jul 7, 2017
064dc88
add the comments for .h file and code tiny modify
NHZlX Jul 10, 2017
198164a
use the expandconvlayer forward and backward, add the explain for class
NHZlX Jul 10, 2017
a3ce6aa
add depthwise conv test
NHZlX Jul 10, 2017
e92f002
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
NHZlX Jul 10, 2017
fd4b113
move DepthwiseConvOpTest.cpp to ConvOpTest.cpp
NHZlX Jul 12, 2017
433935a
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
NHZlX Jul 12, 2017
2bc08f8
modify format accored with clang-format 3.8
NHZlX Jul 12, 2017
ccd46d1
modify format accored with clang-format 3.8
NHZlX Jul 12, 2017
030a3db
the groups default should be None
NHZlX Jul 12, 2017
fc8aedb
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
NHZlX Jul 12, 2017
c43f693
modify the format and delete useless comment
NHZlX Jul 14, 2017
6267312
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
NHZlX Jul 14, 2017
02e04b4
fuse the conv and depthwise conv together
NHZlX Jul 18, 2017
11588b3
support inputchannels != outputchannels of depthwiseconv
NHZlX Jul 18, 2017
d43fbba
add comments for python api
NHZlX Jul 18, 2017
44927bf
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
NHZlX Jul 18, 2017
dbb6588
modity the format
NHZlX Jul 18, 2017
66520af
accelerate inputbackward(delete 'if' in this func) of depthwise conv
NHZlX Jul 19, 2017
d50c71f
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
NHZlX Jul 19, 2017
f7390d1
delete useless .h header in DepthwiseConvOpGpu.cu
NHZlX Jul 19, 2017
21ab0eb
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
NHZlX Jul 19, 2017
77ff97a
fuse interface of depthwise to expand in python api
NHZlX Jul 19, 2017
8199886
fuse interface of depthwise to expandconv
NHZlX Jul 19, 2017
1f516fa
modify format, and modify the layer grad test, op test
NHZlX Jul 19, 2017
bd54eb9
tiny modify the test
NHZlX Jul 19, 2017
4d6be97
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
NHZlX Jul 20, 2017
5b07d4e
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
NHZlX Jul 20, 2017
248149f
add depthwiseconv test and fix the little bug of the convOpTest
NHZlX Jul 20, 2017
d5b0c57
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
NHZlX Jul 20, 2017
cfd4c05
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
NHZlX Jul 21, 2017
e8d171b
add check for groups and inputChannels
NHZlX Jul 21, 2017
6c528cb
add check: CHECK_EQ(outputs[0].getArgType(), ADD_TO)
NHZlX Jul 21, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions paddle/function/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ if(WITH_GPU)
add_simple_unittest(MulOpTest)
add_simple_unittest(CosSimOpTest)
add_simple_unittest(RowConvOpTest)
add_simple_unittest(DepthwiseConvOpTest)
endif()

add_simple_unittest(ConvOpTest)
Expand Down
295 changes: 295 additions & 0 deletions paddle/function/DepthwiseConvOp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,295 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "DepthwiseConvOp.h"
#include "ConvOp.h"
#include "GemmFunctor.h"
//#include "paddle/math/MemoryHandle.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

去掉无用的代码。


namespace paddle {

template <class T>
class DepthwiseConvFunctor<DEVICE_TYPE_CPU, T> {
public:
void operator()(const T* inputData,
const T* filterData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputHeight,
int inputWidth,
int filterHeight,
int filterWidth,
int strideH,
int strideW,
int paddingH,
int paddingW,
T* outputData) {
// TODO(zhaolong) : cpu implementation of depthwise convolution
}
};

template <class T>
class DepthwiseConvGradInputFunctor<DEVICE_TYPE_CPU, T> {
public:
void operator()(const T* outputGrad,
const T* filterData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterHeight,
int filterWidth,
int strideH,
int strideW,
int paddingH,
int paddingW,
T* inputGrad) {}
};

template <class T>
class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_CPU, T> {
public:
void operator()(const T* outputGrad,
const T* inputData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterHeight,
int filterWidth,
int strideH,
int strideW,
int paddingH,
int paddingW,
T* colData,
T* filterGrad) {}
};

/*
* \brief Forward calculation of depthwise convolution.
*/
template <DeviceType Device>
class DepthwiseConvFunction : public ConvFunctionBase {
public:
void init(const FuncConfig& config) override {
ConvFunctionBase::init(config);
}

virtual void check(const BufferArgs& inputs,
const BufferArgs& outputs) override {
const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape();
checkShape(input, filter, output);
}

void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

需要增加CHECK_EQ(outputs[0].getArgType(), ADD_TO);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
check(inputs, outputs);

const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape();

size_t batchSize = input[0];
// size_t inputChannels = input[1];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

depthwiseConv为什么可以不需要inputChannels?

size_t inputHeight = input[2];
size_t inputWidth = input[3];
size_t filterHeight = getFilterHeight(filter);
size_t filterWidth = getFilterWidth(filter);
size_t outputChannels = output[1];
size_t outputHeight = output[2];
size_t outputWidth = output[3];

real* inputData = inputs[0].data<real>();
real* filterData = inputs[1].data<real>();
real* outputData = outputs[0].data<real>();

DepthwiseConvFunctor<Device, real> depthwiseConv;
depthwiseConv(inputData,
filterData,
batchSize,
outputChannels,
outputHeight,
outputWidth,
inputHeight,
inputWidth,
filterHeight,
filterWidth,
strideH(),
strideW(),
paddingH(),
paddingW(),
outputData);
}
};

/*
* \brief Backward input calculation of depthwise convolution.
*/
template <DeviceType Device>
class DepthwiseConvGradInputFunction : public ConvFunctionBase {
public:
void init(const FuncConfig& config) override {
ConvFunctionBase::init(config);
}

virtual void check(const BufferArgs& inputs,
const BufferArgs& outputs) override {
const TensorShape& output = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& input = outputs[0].shape();
checkShape(input, filter, output);
}

void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
check(inputs, outputs);
// Since the implementation of Col2ImFunctor is ADD_TO,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个注释和这里的代码也完全不相关。

// this function only supports ADD_TO mode.
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
const TensorShape& output = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& input = outputs[0].shape();

size_t batchSize = input[0];
size_t inputChannels = input[1];
size_t inputHeight = input[2];
size_t inputWidth = input[3];
size_t filterHeight = getFilterHeight(filter);
size_t filterWidth = getFilterWidth(filter);
size_t outputChannels = output[1];
size_t outputHeight = output[2];
size_t outputWidth = output[3];

real* outputGrad = inputs[0].data<real>();
real* filterData = inputs[1].data<real>();
real* inputGrad = outputs[0].data<real>();

DepthwiseConvGradInputFunctor<Device, real> depthwiseConvGradInput;
depthwiseConvGradInput(outputGrad,
filterData,
batchSize,
outputChannels,
outputHeight,
outputWidth,
inputChannels,
inputHeight,
inputWidth,
filterHeight,
filterWidth,
strideH(),
strideW(),
paddingH(),
paddingW(),
inputGrad);
}
};

/*
* \brief Backward filter calculation of depthwise convolution.
*/
template <DeviceType Device>
class DepthwiseConvGradFilterFunction : public ConvFunctionBase {
public:
void init(const FuncConfig& config) override {
ConvFunctionBase::init(config);
}

virtual void check(const BufferArgs& inputs,
const BufferArgs& outputs) override {
const TensorShape& output = inputs[0].shape();
const TensorShape& input = inputs[1].shape();
const TensorShape& filter = outputs[0].shape();
checkShape(input, filter, output);
}

void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里面需要增加CHECK_EQ(outputs[0].getArgType(), ADD_TO);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

// CHECK_EQ(numInputs_, inputs.size());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个注释是要打开还是去掉?

// CHECK_EQ(numOutputs_, outputs.size());
check(inputs, outputs);
const TensorShape& output = inputs[0].shape();
const TensorShape& input = inputs[1].shape();
// const TensorShape& multiplier = inputs[2].shape();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

没有用的代码去掉。

const TensorShape& filter = outputs[0].shape();

size_t batchSize = input[0];
size_t inputChannels = input[1];
size_t inputHeight = input[2];
size_t inputWidth = input[3];
size_t filterHeight = getFilterHeight(filter);
size_t filterWidth = getFilterWidth(filter);
size_t outputChannels = output[1];
size_t outputHeight = output[2];
size_t outputWidth = output[3];

real* outputGrad = inputs[0].data<real>();
real* inputData = inputs[1].data<real>();
real* filterGrad = outputs[0].data<real>();

int size =
inputChannels * filterHeight * filterWidth * outputHeight * outputWidth;
resizeBuffer<Device>(size);
real* colData = reinterpret_cast<real*>(memory_->getBuf());

DepthwiseConvGradFilterFunctor<Device, real> depthwiseConvGradFilter;

depthwiseConvGradFilter(outputGrad,
inputData,
batchSize,
outputChannels,
outputHeight,
outputWidth,
inputChannels,
inputHeight,
inputWidth,
filterHeight,
filterWidth,
strideH(),
strideW(),
paddingH(),
paddingW(),
colData,
filterGrad);
}
};

REGISTER_TYPED_FUNC(DepthwiseConv, CPU, DepthwiseConvFunction);
REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
CPU,
DepthwiseConvGradInputFunction);
REGISTER_TYPED_FUNC(DepthwiseConvGradFilter,
CPU,
DepthwiseConvGradFilterFunction);
#ifndef PADDLE_ONLY_CPU
REGISTER_TYPED_FUNC(DepthwiseConv, GPU, DepthwiseConvFunction);
REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
GPU,
DepthwiseConvGradInputFunction);
REGISTER_TYPED_FUNC(DepthwiseConvGradFilter,
GPU,
DepthwiseConvGradFilterFunction);
#endif

} // namespace paddle
Loading