-
Notifications
You must be signed in to change notification settings - Fork 5.9k
Add model average optimizer for fluid #9082
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
8a64568
d7e5e1f
aee6867
016d0eb
87fe52c
e0b136c
92a01d4
cad4d7f
d22f4de
e01c770
68c9f6e
ad63722
edb4e29
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,184 @@ | ||
| /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. */ | ||
|
|
||
| #include "paddle/fluid/operators/average_accumulates_op.h" | ||
|
|
||
| namespace paddle { | ||
| namespace operators { | ||
|
|
||
| template <> | ||
| void getAccumulators<paddle::platform::CPUDeviceContext>( | ||
| const framework::ExecutionContext& ctx, int64_t& num_updates_, | ||
| int64_t& num_accumulates_, int64_t& old_num_accumulates_) { | ||
| auto* in_old_num_accumulates = ctx.Input<Tensor>("in_old_num_accumulates"); | ||
| auto* in_num_accumulates = ctx.Input<Tensor>("in_num_accumulates"); | ||
| auto* in_num_updates = ctx.Input<Tensor>("in_num_updates"); | ||
|
|
||
| old_num_accumulates_ = in_old_num_accumulates->data<int64_t>()[0]; | ||
| num_accumulates_ = in_num_accumulates->data<int64_t>()[0]; | ||
| num_updates_ = in_num_updates->data<int64_t>()[0]; | ||
| } | ||
|
|
||
| template <> | ||
| void setAccumulators<paddle::platform::CPUDeviceContext>( | ||
| const framework::ExecutionContext& ctx, int64_t num_updates_, | ||
| int64_t num_accumulates_, int64_t old_num_accumulates_) { | ||
| auto* out_old_num_accumulates = ctx.Output<Tensor>("out_old_num_accumulates"); | ||
| auto* out_num_accumulates = ctx.Output<Tensor>("out_num_accumulates"); | ||
| auto* out_num_updates = ctx.Output<Tensor>("out_num_updates"); | ||
|
|
||
| out_old_num_accumulates->data<int64_t>()[0] = old_num_accumulates_; | ||
| out_num_accumulates->data<int64_t>()[0] = num_accumulates_; | ||
| out_num_updates->data<int64_t>()[0] = num_updates_; | ||
| } | ||
|
|
||
| class AverageAccumulatesOp : public framework::OperatorWithKernel { | ||
| public: | ||
| using framework::OperatorWithKernel::OperatorWithKernel; | ||
|
|
||
| void InferShape(framework::InferShapeContext* ctx) const override { | ||
| PADDLE_ENFORCE( | ||
| ctx->HasInput("param"), | ||
| "Input (param) of average_accumulates op should not be null."); | ||
| PADDLE_ENFORCE( | ||
| ctx->HasInput("in_sum_1"), | ||
| "Input (sum_1) of average_accumulates op should not be null."); | ||
| PADDLE_ENFORCE( | ||
| ctx->HasInput("in_sum_2"), | ||
| "Input (sum_2) of average_accumulates op should not be null."); | ||
| PADDLE_ENFORCE( | ||
| ctx->HasInput("in_sum_3"), | ||
| "Input (sum_3) of average_accumulates op should not be null."); | ||
| PADDLE_ENFORCE( | ||
| ctx->HasInput("in_num_accumulates"), | ||
| "Input (in_num_accumulates) of average_accumulates op should " | ||
| "not be null."); | ||
| PADDLE_ENFORCE(ctx->HasInput("in_old_num_accumulates"), | ||
| "Input (old_num_accumulates) of average_accumulates op " | ||
| "should not be null."); | ||
| PADDLE_ENFORCE( | ||
| ctx->HasInput("in_num_updates"), | ||
| "Input (num_updates) of average_accumulates op should not be null."); | ||
|
|
||
| PADDLE_ENFORCE( | ||
| ctx->HasOutput("out_sum_1"), | ||
| "Output (sum_1) of average_accumulates op should not be null."); | ||
| PADDLE_ENFORCE( | ||
| ctx->HasOutput("out_sum_2"), | ||
| "Output (sum_2) of average_accumulates op should not be null."); | ||
| PADDLE_ENFORCE( | ||
| ctx->HasOutput("out_sum_3"), | ||
| "Output (sum_3) of average_accumulates op should not be null."); | ||
| PADDLE_ENFORCE(ctx->HasOutput("out_num_accumulates"), | ||
| "Output (num_accumulates) of average_accumulates op should " | ||
| "not be null."); | ||
| PADDLE_ENFORCE(ctx->HasOutput("out_old_num_accumulates"), | ||
| "Output (old_num_accumulates) of average_accumulates op " | ||
| "should not be null."); | ||
| PADDLE_ENFORCE( | ||
| ctx->HasOutput("out_num_updates"), | ||
| "Output (num_updates) of average_accumulates op should not be null."); | ||
|
|
||
| auto in_dim = ctx->GetInputDim("param"); | ||
|
|
||
| ctx->SetOutputDim("out_sum_1", in_dim); | ||
| ctx->SetOutputDim("out_sum_2", in_dim); | ||
| ctx->SetOutputDim("out_sum_3", in_dim); | ||
| ctx->SetOutputDim("out_num_accumulates", {1}); | ||
| ctx->SetOutputDim("out_old_num_accumulates", {1}); | ||
| ctx->SetOutputDim("out_num_updates", {1}); | ||
| } | ||
|
|
||
| protected: | ||
| framework::OpKernelType GetExpectedKernelType( | ||
| const framework::ExecutionContext& ctx) const override { | ||
| return framework::OpKernelType( | ||
| framework::ToDataType(ctx.Input<Tensor>("param")->type()), | ||
| ctx.GetPlace()); | ||
| } | ||
| }; | ||
|
|
||
| class AverageAccumulatesOpMaker : public framework::OpProtoAndCheckerMaker { | ||
| public: | ||
| AverageAccumulatesOpMaker(OpProto* proto, OpAttrChecker* op_checker) | ||
| : OpProtoAndCheckerMaker(proto, op_checker) { | ||
| AddInput("param", | ||
| "Input(Tensor or LoDTensor): The parameter to be accumulated."); | ||
| AddInput("in_sum_1", | ||
| "Input(Tensor or LoDTensor): A tensor used to store the parameter " | ||
|
||
| "sums with the same shape as input(param)."); | ||
| AddInput("in_sum_2", | ||
| "Input(Tensor or LoDTensor): A auxiliary tensor to help " | ||
| "accumulating sums of parameter values with the same shape as " | ||
| "input(param). It is used to avoid loss of precision due to too " | ||
| "many sums."); | ||
| AddInput("in_sum_3", | ||
| "Input(Tensor or LoDTensor): A auxiliary tensor to help " | ||
| "accumulating sums of parameter values with the same shape as " | ||
| "input(param)."); | ||
| AddInput("in_num_accumulates", | ||
| "Input(Tensor): The accumulating times of current window with " | ||
|
||
| "shape [1]."); | ||
| AddInput("in_old_num_accumulates", | ||
| "Input(Tensor): The accumulating times of previous window with " | ||
| "shape [1]."); | ||
| AddInput("in_num_updates", | ||
| "Input(Tensor): The total number of batches used by trainning " | ||
| "before this batch with shape [1]."); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in_num_accumulates 这3个标量用fill_constant初始化的时候可以用
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 明白了,那就现在这样吧。觉得更好的是,支持Variable<int/float>这样的变量作为op的输入。 |
||
|
|
||
| AddOutput("out_sum_1", | ||
| "Output(Tensor or LoDTensor): A tensor used to store the " | ||
| "parameter sums with the same shape as input(param)."); | ||
| AddOutput("out_sum_2", | ||
| "Output(Tensor or LoDTensor): A auxiliary tensor to help " | ||
| "accumulating sums of parameter values with the same shape as " | ||
| "input(param). It is used to avoid loss of precision due to too " | ||
| "many sums."); | ||
| AddOutput("out_sum_3", | ||
| "Output(Tensor or LoDTensor): A auxiliary tensor to help " | ||
| "accumulating sums of parameter values with the same shape as " | ||
| "input(param)."); | ||
| AddOutput("out_num_accumulates", | ||
| "Output(Tensor): The accumulating times of current window with " | ||
| "shape [1]."); | ||
| AddOutput("out_old_num_accumulates", | ||
| "Output(Tensor): The accumulating times of previous window with " | ||
| "shape [1]."); | ||
| AddOutput("out_num_updates", | ||
| "Output(Tensor): The total number of batches used by trainning " | ||
| "before this batch with shape [1]."); | ||
|
|
||
| AddAttr<float>("average_window", | ||
| "The rate of average window size relative to num_updates."); | ||
|
||
| AddAttr<int64_t>("max_average_window", "Maximum size of average window."); | ||
|
||
| AddAttr<int64_t>("min_average_window", "Minimu size of average window."); | ||
|
||
|
|
||
| AddComment(R"DOC( | ||
| AverageAccumulates Operator. | ||
| Accumulate the sum of parameter whtin sliding window. The size of sliding window is determined by 'average_window', 'max_average_window' and 'min_average_window'. | ||
|
||
| )DOC"); | ||
| } | ||
| }; | ||
|
|
||
| } // namespace operators | ||
| } // namespace paddle | ||
|
|
||
| namespace ops = paddle::operators; | ||
| REGISTER_OPERATOR(average_accumulates, ops::AverageAccumulatesOp, | ||
| ops::AverageAccumulatesOpMaker, | ||
| paddle::framework::EmptyGradOpMaker); | ||
| REGISTER_OP_CPU_KERNEL( | ||
| average_accumulates, | ||
| ops::AverageAccumulatesKernel<paddle::platform::CPUDeviceContext, float>, | ||
| ops::AverageAccumulatesKernel<paddle::platform::CPUDeviceContext, double>); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,63 @@ | ||
| /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. */ | ||
|
|
||
| #include "paddle/fluid/operators/average_accumulates_op.h" | ||
| #include "paddle/fluid/platform/gpu_info.h" | ||
|
|
||
| namespace paddle { | ||
| namespace operators { | ||
| template <> | ||
| void getAccumulators<paddle::platform::CUDADeviceContext>( | ||
| const framework::ExecutionContext& ctx, int64_t& num_updates_, | ||
| int64_t& num_accumulates_, int64_t& old_num_accumulates_) { | ||
| auto* in_old_num_accumulates = ctx.Input<Tensor>("in_old_num_accumulates"); | ||
| auto* in_num_accumulates = ctx.Input<Tensor>("in_num_accumulates"); | ||
| auto* in_num_updates = ctx.Input<Tensor>("in_num_updates"); | ||
| auto stream = ctx.cuda_device_context().stream(); | ||
| memory::Copy(platform::CPUPlace(), &old_num_accumulates_, | ||
| platform::CUDAPlace(), in_old_num_accumulates->data<int64_t>(), | ||
| sizeof(int64_t), stream); | ||
| memory::Copy(platform::CPUPlace(), &num_accumulates_, platform::CUDAPlace(), | ||
| in_num_accumulates->data<int64_t>(), sizeof(int64_t), stream); | ||
| memory::Copy(platform::CPUPlace(), &num_updates_, platform::CUDAPlace(), | ||
| in_num_updates->data<int64_t>(), sizeof(int64_t), stream); | ||
| } | ||
|
|
||
| template <> | ||
| void setAccumulators<paddle::platform::CUDADeviceContext>( | ||
| const framework::ExecutionContext& ctx, int64_t num_updates_, | ||
| int64_t num_accumulates_, int64_t old_num_accumulates_) { | ||
| auto stream = ctx.cuda_device_context().stream(); | ||
| auto* out_old_num_accumulates = ctx.Output<Tensor>("out_old_num_accumulates"); | ||
| auto* out_num_accumulates = ctx.Output<Tensor>("out_num_accumulates"); | ||
| auto* out_num_updates = ctx.Output<Tensor>("out_num_updates"); | ||
|
|
||
| memory::Copy(platform::CUDAPlace(), out_old_num_accumulates->data<int64_t>(), | ||
| platform::CPUPlace(), &old_num_accumulates_, sizeof(int64_t), | ||
| stream); | ||
| memory::Copy(platform::CUDAPlace(), out_num_accumulates->data<int64_t>(), | ||
| platform::CPUPlace(), &num_accumulates_, sizeof(int64_t), | ||
| stream); | ||
| memory::Copy(platform::CUDAPlace(), out_num_updates->data<int64_t>(), | ||
| platform::CPUPlace(), &num_updates_, sizeof(int64_t), stream); | ||
| } | ||
|
|
||
| } // namespace operators | ||
| } // namespace paddle | ||
|
|
||
| namespace ops = paddle::operators; | ||
| REGISTER_OP_CUDA_KERNEL( | ||
| average_accumulates, | ||
| ops::AverageAccumulatesKernel<paddle::platform::CUDADeviceContext, float>, | ||
| ops::AverageAccumulatesKernel<paddle::platform::CUDADeviceContext, double>); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,110 @@ | ||
| /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. */ | ||
|
|
||
| #pragma once | ||
| #include <algorithm> | ||
| #include "paddle/fluid/framework/eigen.h" | ||
| #include "paddle/fluid/framework/op_registry.h" | ||
| #include "paddle/fluid/operators/math/math_function.h" | ||
|
|
||
| namespace paddle { | ||
| namespace operators { | ||
|
|
||
| using Tensor = framework::Tensor; | ||
|
|
||
| template <typename T, int MajorType = Eigen::RowMajor, | ||
| typename IndexType = Eigen::DenseIndex> | ||
| using EigenVector = framework::EigenVector<T, MajorType, IndexType>; | ||
|
|
||
| template <typename DeviceContext> | ||
| void getAccumulators(const framework::ExecutionContext& ctx, | ||
|
||
| int64_t& num_updates, int64_t& num_accumulates, | ||
| int64_t& old_num_accumulates); | ||
|
|
||
| template <typename DeviceContext> | ||
| void setAccumulators(const framework::ExecutionContext& ctx, | ||
|
||
| int64_t num_updates, int64_t num_accumulates, | ||
| int64_t old_num_accumulates); | ||
|
|
||
| template <typename DeviceContext, typename T> | ||
| class AverageAccumulatesKernel : public framework::OpKernel<T> { | ||
| public: | ||
| void Compute(const framework::ExecutionContext& ctx) const override { | ||
| // It is used to avoid loss of precision | ||
| static const int64_t kMaxNumAccumulates = 16384; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any reference paper for kMaxNumAccumulates
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems that |
||
| // Get accumulators from input | ||
| int64_t num_updates = 0; | ||
| int64_t num_accumulates = 0; | ||
| int64_t old_num_accumulates = 0; | ||
| getAccumulators<DeviceContext>(ctx, num_updates, num_accumulates, | ||
| old_num_accumulates); | ||
|
|
||
| // Get attrs | ||
| float average_window = ctx.Attr<float>("average_window"); | ||
| int64_t max_average_window = ctx.Attr<int64_t>("max_average_window"); | ||
| int64_t min_average_window = ctx.Attr<int64_t>("min_average_window"); | ||
| min_average_window = | ||
| std::min<int64_t>(min_average_window, max_average_window); | ||
|
|
||
| // Get inputs | ||
| auto* param = ctx.Input<Tensor>("param"); | ||
| auto* in_sum_1 = ctx.Input<Tensor>("in_sum_1"); | ||
| auto* in_sum_2 = ctx.Input<Tensor>("in_sum_2"); | ||
| auto* in_sum_3 = ctx.Input<Tensor>("in_sum_3"); | ||
| auto param_tensor = EigenVector<T>::Flatten(*param); | ||
| auto in_sum_1_tensor = EigenVector<T>::Flatten(*in_sum_1); | ||
| auto in_sum_2_tensor = EigenVector<T>::Flatten(*in_sum_2); | ||
| auto in_sum_3_tensor = EigenVector<T>::Flatten(*in_sum_3); | ||
|
|
||
| // Get outputs | ||
| auto* out_sum_1 = ctx.Output<Tensor>("out_sum_1"); | ||
| auto* out_sum_2 = ctx.Output<Tensor>("out_sum_2"); | ||
| auto* out_sum_3 = ctx.Output<Tensor>("out_sum_3"); | ||
| auto out_sum_1_tensor = EigenVector<T>::Flatten(*out_sum_1); | ||
| auto out_sum_2_tensor = EigenVector<T>::Flatten(*out_sum_2); | ||
| auto out_sum_3_tensor = EigenVector<T>::Flatten(*out_sum_3); | ||
|
|
||
| // Compute | ||
| auto& place = *ctx.template device_context<DeviceContext>().eigen_device(); | ||
| math::SetConstant<DeviceContext, T> constant_functor; | ||
| ++num_updates; | ||
| ++num_accumulates; | ||
| out_sum_1_tensor.device(place) = in_sum_1_tensor + param_tensor; | ||
| out_sum_2_tensor.device(place) = in_sum_2_tensor; | ||
| out_sum_3_tensor.device(place) = in_sum_3_tensor; | ||
| if (num_updates % kMaxNumAccumulates == 0) { | ||
| out_sum_2_tensor.device(place) = in_sum_2_tensor + in_sum_1_tensor; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add comments before lin 87:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
| constant_functor(ctx.template device_context<DeviceContext>(), out_sum_1, | ||
| 0.0); | ||
| } | ||
| if (num_accumulates >= min_average_window && | ||
| num_accumulates >= std::min<int64_t>(max_average_window, | ||
| num_updates * average_window)) { | ||
| out_sum_3_tensor.device(place) = in_sum_1_tensor + in_sum_2_tensor; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add comments before line 94:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
| constant_functor(ctx.template device_context<DeviceContext>(), out_sum_1, | ||
| 0.0); | ||
| constant_functor(ctx.template device_context<DeviceContext>(), out_sum_2, | ||
| 0.0); | ||
| old_num_accumulates = num_accumulates; | ||
| num_accumulates = 0; | ||
| } | ||
|
|
||
| // Set accumulators to output | ||
| setAccumulators<DeviceContext>(ctx, num_updates, num_accumulates, | ||
| old_num_accumulates); | ||
| } | ||
| }; | ||
|
|
||
| } // namespace operators | ||
| } // namespace paddle | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Input(Tensor or LoDTensor) -> (Tensor or LoDTensor)
There is no Input before (
https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/operators/mul_op.cc#L79
The same as below.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.