-
Notifications
You must be signed in to change notification settings - Fork 5.9k
Mobilenet gpu implementation #2776
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
211f83f
eeb17c2
efae51c
f4e7ae5
36e7800
064dc88
198164a
a3ce6aa
e92f002
fd4b113
433935a
2bc08f8
ccd46d1
030a3db
fc8aedb
c43f693
6267312
02e04b4
11588b3
d43fbba
44927bf
dbb6588
66520af
d50c71f
f7390d1
21ab0eb
77ff97a
8199886
1f516fa
bd54eb9
4d6be97
5b07d4e
248149f
d5b0c57
cfd4c05
e8d171b
6c528cb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,295 @@ | ||
| /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. */ | ||
|
|
||
| #include "DepthwiseConvOp.h" | ||
| #include "ConvOp.h" | ||
| #include "GemmFunctor.h" | ||
| //#include "paddle/math/MemoryHandle.h" | ||
|
|
||
| namespace paddle { | ||
|
|
||
| template <class T> | ||
| class DepthwiseConvFunctor<DEVICE_TYPE_CPU, T> { | ||
| public: | ||
| void operator()(const T* inputData, | ||
| const T* filterData, | ||
| int batchSize, | ||
| int outputChannels, | ||
| int outputHeight, | ||
| int outputWidth, | ||
| int inputHeight, | ||
| int inputWidth, | ||
| int filterHeight, | ||
| int filterWidth, | ||
| int strideH, | ||
| int strideW, | ||
| int paddingH, | ||
| int paddingW, | ||
| T* outputData) { | ||
| // TODO(zhaolong) : cpu implementation of depthwise convolution | ||
| } | ||
| }; | ||
|
|
||
| template <class T> | ||
| class DepthwiseConvGradInputFunctor<DEVICE_TYPE_CPU, T> { | ||
| public: | ||
| void operator()(const T* outputGrad, | ||
| const T* filterData, | ||
| int batchSize, | ||
| int outputChannels, | ||
| int outputHeight, | ||
| int outputWidth, | ||
| int inputChannels, | ||
| int inputHeight, | ||
| int inputWidth, | ||
| int filterHeight, | ||
| int filterWidth, | ||
| int strideH, | ||
| int strideW, | ||
| int paddingH, | ||
| int paddingW, | ||
| T* inputGrad) {} | ||
| }; | ||
|
|
||
| template <class T> | ||
| class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_CPU, T> { | ||
| public: | ||
| void operator()(const T* outputGrad, | ||
| const T* inputData, | ||
| int batchSize, | ||
| int outputChannels, | ||
| int outputHeight, | ||
| int outputWidth, | ||
| int inputChannels, | ||
| int inputHeight, | ||
| int inputWidth, | ||
| int filterHeight, | ||
| int filterWidth, | ||
| int strideH, | ||
| int strideW, | ||
| int paddingH, | ||
| int paddingW, | ||
| T* colData, | ||
| T* filterGrad) {} | ||
| }; | ||
|
|
||
| /* | ||
| * \brief Forward calculation of depthwise convolution. | ||
| */ | ||
| template <DeviceType Device> | ||
| class DepthwiseConvFunction : public ConvFunctionBase { | ||
| public: | ||
| void init(const FuncConfig& config) override { | ||
| ConvFunctionBase::init(config); | ||
| } | ||
|
|
||
| virtual void check(const BufferArgs& inputs, | ||
| const BufferArgs& outputs) override { | ||
| const TensorShape& input = inputs[0].shape(); | ||
| const TensorShape& filter = inputs[1].shape(); | ||
| const TensorShape& output = outputs[0].shape(); | ||
| checkShape(input, filter, output); | ||
| } | ||
|
|
||
| void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 需要增加
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
| CHECK_EQ(numInputs_, inputs.size()); | ||
| CHECK_EQ(numOutputs_, outputs.size()); | ||
| check(inputs, outputs); | ||
|
|
||
| const TensorShape& input = inputs[0].shape(); | ||
| const TensorShape& filter = inputs[1].shape(); | ||
| const TensorShape& output = outputs[0].shape(); | ||
|
|
||
| size_t batchSize = input[0]; | ||
| // size_t inputChannels = input[1]; | ||
|
||
| size_t inputHeight = input[2]; | ||
| size_t inputWidth = input[3]; | ||
| size_t filterHeight = getFilterHeight(filter); | ||
| size_t filterWidth = getFilterWidth(filter); | ||
| size_t outputChannels = output[1]; | ||
| size_t outputHeight = output[2]; | ||
| size_t outputWidth = output[3]; | ||
|
|
||
| real* inputData = inputs[0].data<real>(); | ||
| real* filterData = inputs[1].data<real>(); | ||
| real* outputData = outputs[0].data<real>(); | ||
|
|
||
| DepthwiseConvFunctor<Device, real> depthwiseConv; | ||
| depthwiseConv(inputData, | ||
| filterData, | ||
| batchSize, | ||
| outputChannels, | ||
| outputHeight, | ||
| outputWidth, | ||
| inputHeight, | ||
| inputWidth, | ||
| filterHeight, | ||
| filterWidth, | ||
| strideH(), | ||
| strideW(), | ||
| paddingH(), | ||
| paddingW(), | ||
| outputData); | ||
| } | ||
| }; | ||
|
|
||
| /* | ||
| * \brief Backward input calculation of depthwise convolution. | ||
| */ | ||
| template <DeviceType Device> | ||
| class DepthwiseConvGradInputFunction : public ConvFunctionBase { | ||
| public: | ||
| void init(const FuncConfig& config) override { | ||
| ConvFunctionBase::init(config); | ||
| } | ||
|
|
||
| virtual void check(const BufferArgs& inputs, | ||
| const BufferArgs& outputs) override { | ||
| const TensorShape& output = inputs[0].shape(); | ||
| const TensorShape& filter = inputs[1].shape(); | ||
| const TensorShape& input = outputs[0].shape(); | ||
| checkShape(input, filter, output); | ||
| } | ||
|
|
||
| void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { | ||
| CHECK_EQ(numInputs_, inputs.size()); | ||
| CHECK_EQ(numOutputs_, outputs.size()); | ||
| check(inputs, outputs); | ||
| // Since the implementation of Col2ImFunctor is ADD_TO, | ||
|
||
| // this function only supports ADD_TO mode. | ||
| CHECK_EQ(outputs[0].getArgType(), ADD_TO); | ||
| const TensorShape& output = inputs[0].shape(); | ||
| const TensorShape& filter = inputs[1].shape(); | ||
| const TensorShape& input = outputs[0].shape(); | ||
|
|
||
| size_t batchSize = input[0]; | ||
| size_t inputChannels = input[1]; | ||
| size_t inputHeight = input[2]; | ||
| size_t inputWidth = input[3]; | ||
| size_t filterHeight = getFilterHeight(filter); | ||
| size_t filterWidth = getFilterWidth(filter); | ||
| size_t outputChannels = output[1]; | ||
| size_t outputHeight = output[2]; | ||
| size_t outputWidth = output[3]; | ||
|
|
||
| real* outputGrad = inputs[0].data<real>(); | ||
| real* filterData = inputs[1].data<real>(); | ||
| real* inputGrad = outputs[0].data<real>(); | ||
|
|
||
| DepthwiseConvGradInputFunctor<Device, real> depthwiseConvGradInput; | ||
| depthwiseConvGradInput(outputGrad, | ||
| filterData, | ||
| batchSize, | ||
| outputChannels, | ||
| outputHeight, | ||
| outputWidth, | ||
| inputChannels, | ||
| inputHeight, | ||
| inputWidth, | ||
| filterHeight, | ||
| filterWidth, | ||
| strideH(), | ||
| strideW(), | ||
| paddingH(), | ||
| paddingW(), | ||
| inputGrad); | ||
| } | ||
| }; | ||
|
|
||
| /* | ||
| * \brief Backward filter calculation of depthwise convolution. | ||
| */ | ||
| template <DeviceType Device> | ||
| class DepthwiseConvGradFilterFunction : public ConvFunctionBase { | ||
| public: | ||
| void init(const FuncConfig& config) override { | ||
| ConvFunctionBase::init(config); | ||
| } | ||
|
|
||
| virtual void check(const BufferArgs& inputs, | ||
| const BufferArgs& outputs) override { | ||
| const TensorShape& output = inputs[0].shape(); | ||
| const TensorShape& input = inputs[1].shape(); | ||
| const TensorShape& filter = outputs[0].shape(); | ||
| checkShape(input, filter, output); | ||
| } | ||
|
|
||
| void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里面需要增加
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
| // CHECK_EQ(numInputs_, inputs.size()); | ||
|
||
| // CHECK_EQ(numOutputs_, outputs.size()); | ||
| check(inputs, outputs); | ||
| const TensorShape& output = inputs[0].shape(); | ||
| const TensorShape& input = inputs[1].shape(); | ||
| // const TensorShape& multiplier = inputs[2].shape(); | ||
|
||
| const TensorShape& filter = outputs[0].shape(); | ||
|
|
||
| size_t batchSize = input[0]; | ||
| size_t inputChannels = input[1]; | ||
| size_t inputHeight = input[2]; | ||
| size_t inputWidth = input[3]; | ||
| size_t filterHeight = getFilterHeight(filter); | ||
| size_t filterWidth = getFilterWidth(filter); | ||
| size_t outputChannels = output[1]; | ||
| size_t outputHeight = output[2]; | ||
| size_t outputWidth = output[3]; | ||
|
|
||
| real* outputGrad = inputs[0].data<real>(); | ||
| real* inputData = inputs[1].data<real>(); | ||
| real* filterGrad = outputs[0].data<real>(); | ||
|
|
||
| int size = | ||
| inputChannels * filterHeight * filterWidth * outputHeight * outputWidth; | ||
| resizeBuffer<Device>(size); | ||
| real* colData = reinterpret_cast<real*>(memory_->getBuf()); | ||
|
|
||
| DepthwiseConvGradFilterFunctor<Device, real> depthwiseConvGradFilter; | ||
|
|
||
| depthwiseConvGradFilter(outputGrad, | ||
| inputData, | ||
| batchSize, | ||
| outputChannels, | ||
| outputHeight, | ||
| outputWidth, | ||
| inputChannels, | ||
| inputHeight, | ||
| inputWidth, | ||
| filterHeight, | ||
| filterWidth, | ||
| strideH(), | ||
| strideW(), | ||
| paddingH(), | ||
| paddingW(), | ||
| colData, | ||
| filterGrad); | ||
| } | ||
| }; | ||
|
|
||
| REGISTER_TYPED_FUNC(DepthwiseConv, CPU, DepthwiseConvFunction); | ||
| REGISTER_TYPED_FUNC(DepthwiseConvGradInput, | ||
| CPU, | ||
| DepthwiseConvGradInputFunction); | ||
| REGISTER_TYPED_FUNC(DepthwiseConvGradFilter, | ||
| CPU, | ||
| DepthwiseConvGradFilterFunction); | ||
| #ifndef PADDLE_ONLY_CPU | ||
| REGISTER_TYPED_FUNC(DepthwiseConv, GPU, DepthwiseConvFunction); | ||
| REGISTER_TYPED_FUNC(DepthwiseConvGradInput, | ||
| GPU, | ||
| DepthwiseConvGradInputFunction); | ||
| REGISTER_TYPED_FUNC(DepthwiseConvGradFilter, | ||
| GPU, | ||
| DepthwiseConvGradFilterFunction); | ||
| #endif | ||
|
|
||
| } // namespace paddle | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
去掉无用的代码。