Skip to content

Commit f1d5fb3

Browse files
committed
support soft labels.
1 parent a2a0d6f commit f1d5fb3

File tree

10 files changed

+74
-87
lines changed

10 files changed

+74
-87
lines changed

paddle/operators/math/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
if(WITH_GPU)
22
nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc
33
im2col.cu DEPS cblas device_context operator)
4-
nv_library(softmax_function SRCS softmax_function.cc softmax_function.cu
4+
nv_library(softmax_function SRCS softmax.cc softmax.cu
55
DEPS operator)
66
else()
77
cc_library(math_function SRCS math_function.cc im2col.cc
88
DEPS cblas device_context operator)
9-
cc_library(softmax_function SRCS softmax_function.cc DEPS operator)
9+
cc_library(softmax_function SRCS softmax.cc DEPS operator)
1010
endif()
1111

1212
nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor)
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

15-
#include "paddle/operators/math/softmax_function.h"
15+
#include "paddle/operators/math/softmax.h"
1616

1717
namespace paddle {
1818
namespace operators {
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
#define EIGEN_USE_GPU
1616

17-
#include "paddle/operators/math/softmax_function.h"
17+
#include "paddle/operators/math/softmax.h"
1818

1919
namespace paddle {
2020
namespace operators {

paddle/operators/math/utils.h

Lines changed: 0 additions & 42 deletions
This file was deleted.

paddle/operators/softmax_op.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ limitations under the License. */
1515
#pragma once
1616
#include "paddle/framework/eigen.h"
1717
#include "paddle/framework/op_registry.h"
18-
#include "paddle/operators/math/softmax_function.h"
18+
#include "paddle/operators/math/softmax.h"
1919

2020
namespace paddle {
2121
namespace operators {

paddle/operators/softmax_with_cross_entropy_op.cc

Lines changed: 51 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,32 @@ class SoftmaxWithCrossEntropyOpMaker
2323
SoftmaxWithCrossEntropyOpMaker(framework::OpProto* proto,
2424
framework::OpAttrChecker* op_checker)
2525
: OpProtoAndCheckerMaker(proto, op_checker) {
26+
//(TODO caoying) replace int with boolean
27+
AddAttr<int>("soft_label",
28+
"(int, default 0), A flag to indicate whether to interpretate "
29+
"the given labels as soft labels.")
30+
.SetDefault(0);
2631
AddInput("Logits",
27-
"The unscaled log probabilities which is a 2-D tensor<float> with"
28-
"shape [N x K]. N is the batch_size, and K is the class number.")
32+
"(Tensor, default Tensor<float>), The unscaled log probabilities "
33+
"which is a 2-D tensor with shape [N x K]. N is the batch_size, "
34+
"and K is the class number.")
2935
.NotInGradient();
30-
AddInput("Label", "The ground truth. A 1-D tensor<int> with shape N.");
31-
AddOutput("Softmax",
32-
"Store the outputs of softmax function, "
33-
"which will be used in backward calculation.")
36+
AddInput(
37+
"Label",
38+
"(Tensor, default Tensor<int>), The ground truth which is "
39+
"a 1-D or 2-D tensor. "
40+
"If soft_label is set to 0, Label is a Tensor<int> with shape [N x 1]. "
41+
"If soft_label is set to 1, Label is a Tensor<float/double> "
42+
"with shape [N x K].");
43+
AddOutput(
44+
"Softmax",
45+
"(Tensor, default Tensor<float>), A 2-D tensor with shape [N x K]. "
46+
"The outputs value of softmax activation by given the input batch, "
47+
"which will be used in backward calculation.")
3448
.AsIntermediate();
35-
AddOutput("Out", "A 1-D tensor<float> with shape N.");
49+
AddOutput("Loss",
50+
"(Tensor, default Tensor<float>), A 1-D tensor. The cross "
51+
"entropy loss with shape [N x 1].");
3652
AddComment(R"DOC(
3753
Cross entropy loss with softmax are used as the output layer extensively. This
3854
operator computes the softmax normalized values for each row of the input
@@ -46,25 +62,18 @@ which will produce incorrect results.
4662
This operators expects mutually exclusive hard labels, each sample in a batch
4763
is in exactly one class with probabilities 1. Each sample in the batch with one
4864
and only one label.
49-
)DOC");
50-
}
51-
};
5265
53-
class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel {
54-
public:
55-
using framework::OperatorWithKernel::OperatorWithKernel;
66+
Equation:
5667
57-
protected:
58-
void InferShape(const framework::InferShapeContext& ctx) const override {
59-
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
60-
"Input(Out@Grad) should not be null");
61-
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Softmax"),
62-
"Input(Softmax) should be not null.");
63-
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"),
64-
"Input(Lable) should be not null.");
68+
1) hard label (one-hot label)
6569
66-
ctx.Output<framework::LoDTensor>(framework::GradVarName("Logits"))
67-
->Resize(ctx.Input<Tensor>("Softmax")->dims());
70+
Loss_j = -\text{Logit}_{Label_j} + \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right), j = 1, ..., K
71+
72+
2) soft label (a distribution over all classes)
73+
74+
Loss_j = -\sum_{i=0}^{K}\text{Label}_i\left(\text{Logit}_i-\log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right)\right), j = 1,...,K
75+
76+
)DOC");
6877
}
6978
};
7079

@@ -82,7 +91,25 @@ class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel {
8291
"The label should be a 1-d tensor.");
8392

8493
ctx.Output<framework::LoDTensor>("Softmax")->Resize(logits->dims());
85-
ctx.Output<framework::LoDTensor>("Out")->Resize({logits->dims()[0], 1});
94+
ctx.Output<framework::LoDTensor>("Loss")->Resize({logits->dims()[0], 1});
95+
}
96+
};
97+
98+
class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel {
99+
public:
100+
using framework::OperatorWithKernel::OperatorWithKernel;
101+
102+
protected:
103+
void InferShape(const framework::InferShapeContext& ctx) const override {
104+
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Loss")),
105+
"Input(Loss@Grad) should not be null");
106+
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Softmax"),
107+
"Input(Softmax) should be not null.");
108+
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"),
109+
"Input(Lable) should be not null.");
110+
111+
ctx.Output<framework::LoDTensor>(framework::GradVarName("Logits"))
112+
->Resize(ctx.Input<Tensor>("Softmax")->dims());
86113
}
87114
};
88115

paddle/operators/softmax_with_cross_entropy_op.cu

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@
1313
limitations under the License. */
1414

1515
#define EIGEN_USE_GPU
16+
1617
#include "paddle/framework/op_registry.h"
17-
#include "paddle/operators/math/softmax_function.h"
18-
#include "paddle/operators/math/utils.h"
18+
#include "paddle/operators/cross_entropy_op.h"
19+
#include "paddle/operators/math/softmax.h"
1920

2021
namespace paddle {
2122
namespace operators {
@@ -27,9 +28,10 @@ __global__ void CrossEntropyKernel(T* out, const T* softmax_out,
2728
const int* label, const int batch_size,
2829
const int class_num) {
2930
int i = blockIdx.x * blockDim.x + threadIdx.x;
30-
if (i >= batch_size) return;
31-
PADDLE_ASSERT(label[i] >= 0 && label[i] < class_num);
32-
out[i] = -math::tolerable_value(log(softmax_out[i * class_num + label[i]]));
31+
if (i < batch_size) {
32+
PADDLE_ASSERT(label[i] >= 0 && label[i] < class_num);
33+
out[i] = -tolerable_value(std::log(softmax_out[i * class_num + label[i]]));
34+
}
3335
}
3436

3537
template <typename T>
@@ -38,10 +40,10 @@ __global__ void CrossEntropyWithSoftmaxGradKernel(T* softmax_out,
3840
const int batch_size,
3941
const int class_num) {
4042
int i = blockIdx.x * blockDim.x + threadIdx.x;
41-
if (i >= batch_size) return;
42-
43-
PADDLE_ASSERT(label[i] >= 0 && label[i] < class_num);
44-
softmax_out[i * class_num + label[i]] -= 1.;
43+
if (i < batch_size) {
44+
PADDLE_ASSERT(label[i] >= 0 && label[i] < class_num);
45+
softmax_out[i * class_num + label[i]] -= 1.;
46+
}
4547
}
4648

4749
template <typename T>
@@ -60,7 +62,7 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel {
6062

6163
// Calculate the cross entropy loss based on hard labels.
6264
const int* label_data = context.Input<Tensor>("Label")->data<int>();
63-
Tensor* loss = context.Output<Tensor>("Out");
65+
Tensor* loss = context.Output<Tensor>("Loss");
6466
loss->mutable_data<T>(context.GetPlace());
6567
T* loss_data = loss->data<T>();
6668

paddle/operators/softmax_with_cross_entropy_op.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
#pragma once
1616
#include "paddle/framework/eigen.h"
1717
#include "paddle/framework/op_registry.h"
18-
#include "paddle/operators/math/softmax_function.h"
19-
#include "paddle/operators/math/utils.h"
18+
#include "paddle/operators/cross_entropy_op.h"
19+
#include "paddle/operators/math/softmax.h"
2020

2121
namespace paddle {
2222
namespace operators {
@@ -44,7 +44,7 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel {
4444
T* softmax_out = softmax->data<T>();
4545
const int* label_data = context.Input<Tensor>("Label")->data<int>();
4646

47-
Tensor* loss = context.Output<Tensor>("Out");
47+
Tensor* loss = context.Output<Tensor>("Loss");
4848
loss->mutable_data<T>(context.GetPlace());
4949
T* loss_data = loss->data<T>();
5050

@@ -53,7 +53,7 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel {
5353

5454
for (int i = 0; i < batch_size; ++i) {
5555
int index = i * class_num + label_data[i];
56-
loss_data[i] = -math::tolerable_value(std::log(softmax_out[index]));
56+
loss_data[i] = -tolerable_value(std::log(softmax_out[index]));
5757
}
5858
}
5959
};

python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@ def setUp(self):
2525
dtype="float32")
2626

2727
self.inputs = {"Logits": logits, "Label": labels}
28-
self.outputs = {"Softmax": softmax, "Out": cross_entropy}
28+
self.outputs = {"Softmax": softmax, "Loss": cross_entropy}
2929

3030
def test_check_output(self):
3131
self.check_output()
3232

3333
def test_check_grad(self):
34-
self.check_grad(["Logits"], "Out", max_relative_error=0.05)
34+
self.check_grad(["Logits"], "Loss", max_relative_error=0.05)
3535

3636

3737
if __name__ == "__main__":

0 commit comments

Comments
 (0)