Skip to content

Commit 74e0eb7

Browse files
authored
make float16 a pod type (#8456)
1 parent 74404fa commit 74e0eb7

File tree

4 files changed

+99
-14
lines changed

4 files changed

+99
-14
lines changed

paddle/fluid/framework/tensor_impl.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ limitations under the License. */
1515
#pragma once
1616
#include "paddle/fluid/memory/memcpy.h"
1717
#include "paddle/fluid/platform/enforce.h"
18+
#include "paddle/fluid/platform/float16.h"
1819

1920
namespace paddle {
2021
namespace framework {
@@ -52,7 +53,9 @@ struct SizeOfTypeFunctor<HEAD, TAIL...> {
5253
};
5354

5455
static inline size_t SizeOfType(std::type_index type) {
55-
SizeOfTypeFunctor<int, float, double, int16_t, int64_t, bool, size_t> functor;
56+
SizeOfTypeFunctor<int, float, double, int16_t, int64_t, bool, size_t,
57+
platform::float16>
58+
functor;
5659
size_t size = functor(type);
5760
PADDLE_ENFORCE(size != 0UL, "Cannot get size of type %s", type.name());
5861
return size;

paddle/fluid/platform/float16.h

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ limitations under the License. */
6262
#define PADDLE_ALIGN(x) __attribute__((aligned(x)))
6363

6464
namespace paddle {
65+
namespace platform {
6566

6667
// Use PADDLE_ALIGNED(2) to ensure that each float16 will be allocated
6768
// and aligned at least on a 2-byte boundary, which leads to efficient
@@ -71,11 +72,21 @@ struct PADDLE_ALIGN(2) float16 {
7172
public:
7273
uint16_t x;
7374

74-
// Constructors
75-
HOSTDEVICE inline float16() : x(0) {}
75+
// The following defaulted special class member functions
76+
// are added to make float16 pass the std::is_trivial test
77+
HOSTDEVICE inline float16() = default;
7678

77-
HOSTDEVICE inline float16(const float16& h) : x(h.x) {}
79+
HOSTDEVICE inline float16(const float16&) = default;
7880

81+
HOSTDEVICE inline float16& operator=(const float16&) = default;
82+
83+
HOSTDEVICE inline float16(float16&&) = default;
84+
85+
HOSTDEVICE inline float16& operator=(float16&&) = default;
86+
87+
HOSTDEVICE inline ~float16() = default;
88+
89+
// Constructors
7990
#ifdef PADDLE_CUDA_FP16
8091
HOSTDEVICE inline explicit float16(const half& h) {
8192
#if CUDA_VERSION >= 9000
@@ -136,11 +147,6 @@ struct PADDLE_ALIGN(2) float16 {
136147
HOSTDEVICE inline explicit float16(const T& val)
137148
: x(float16(static_cast<float>(val)).x) {}
138149

139-
HOSTDEVICE inline float16& operator=(const float16& rhs) {
140-
x = rhs.x;
141-
return *this;
142-
}
143-
144150
// Assignment operators
145151
#ifdef PADDLE_CUDA_FP16
146152
HOSTDEVICE inline float16& operator=(const half& rhs) {
@@ -727,4 +733,25 @@ HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) {
727733
return float(a) >= float(b);
728734
}
729735
#endif
736+
737+
} // namespace platform
730738
} // namespace paddle
739+
740+
namespace std {
741+
742+
// Override the std::is_pod::value for float16
743+
// The reason is that different compilers implemented std::is_pod based on
744+
// different C++ standards. float16 class is a plain old data in C++11 given
745+
// that it is both trivial and standard_layout.
746+
// However, std::is_pod in nvcc 8.0 host c++ compiler follows C++0x and is
747+
// more restricted in that you cannot provide any customized
748+
// constructor in float16. Hence, we override is_pod here following C++11
749+
// so that .cu files can be successfully compiled by nvcc.
750+
template <>
751+
struct is_pod<paddle::platform::float16> {
752+
static const bool value =
753+
is_trivial<paddle::platform::float16>::value &&
754+
is_standard_layout<paddle::platform::float16>::value;
755+
};
756+
757+
} // namespace std

paddle/fluid/platform/float16_test.cc

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,13 @@ See the License for the specific language governing permissions and
1010
limitations under the License. */
1111

1212
#include "paddle/fluid/platform/float16.h"
13+
#include "paddle/fluid/framework/init.h"
14+
#include "paddle/fluid/framework/lod_tensor.h"
1315

1416
#include <gtest/gtest.h>
1517

1618
namespace paddle {
19+
namespace platform {
1720

1821
TEST(float16, conversion_cpu) {
1922
// Explicit conversion from Eigen::half
@@ -54,13 +57,9 @@ TEST(float16, conversion_cpu) {
5457
EXPECT_EQ(float16(true).x, 0x3c00);
5558
EXPECT_EQ(float16(false).x, 0x0000);
5659

57-
// Default constructor
58-
float16 v_def;
59-
EXPECT_EQ(v_def.x, 0x0000);
60-
6160
// Assignment operator
6261
float16 v_assign;
63-
v_assign = v_def;
62+
v_assign = float16(0);
6463
EXPECT_EQ(v_assign.x, 0x0000);
6564
v_assign = Eigen::half(1.0f);
6665
EXPECT_EQ(v_assign.x, 0x3c00);
@@ -116,4 +115,27 @@ TEST(float16, comparison_cpu) {
116115
EXPECT_FALSE(float16(-0.0f) > float16(0.0f));
117116
}
118117

118+
TEST(float16, lod_tensor_cpu) {
119+
framework::LoDTensor lod_tensor;
120+
121+
std::vector<float16> input_data = {float16(1.0f), float16(0.5f),
122+
float16(0.33333f), float16(0.0f)};
123+
EXPECT_EQ(input_data[0].x, 0x3c00);
124+
EXPECT_EQ(input_data[1].x, 0x3800);
125+
EXPECT_EQ(input_data[2].x, 0x3555);
126+
EXPECT_EQ(input_data[3].x, 0x0000);
127+
128+
lod_tensor.Resize({4, 1});
129+
lod_tensor.set_lod(framework::LoD({{0, 2, 4}}));
130+
float16* data_ptr = lod_tensor.mutable_data<float16>(CPUPlace());
131+
132+
EXPECT_NE(data_ptr, nullptr);
133+
EXPECT_EQ(input_data.size(), static_cast<size_t>(lod_tensor.numel()));
134+
for (size_t i = 0; i < input_data.size(); ++i) {
135+
data_ptr[i] = input_data[i];
136+
EXPECT_EQ(data_ptr[i].x, input_data[i].x);
137+
}
138+
}
139+
140+
} // namespace platform
119141
} // namespace paddle

paddle/fluid/platform/float16_test.cu

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ limitations under the License. */
1313

1414
#include <gtest/gtest.h>
1515

16+
#include "paddle/fluid/framework/lod_tensor.h"
17+
#include "paddle/fluid/framework/tensor_util.h"
1618
#include "paddle/utils/Logging.h"
1719

1820
#define ARITHMETIC_KERNEL(op_type, sign) \
@@ -108,6 +110,7 @@ limitations under the License. */
108110

109111
#ifdef PADDLE_CUDA_FP16
110112
namespace paddle {
113+
namespace platform {
111114

112115
#if CUDA_VERSION < 9000
113116
ARITHMETIC_KERNEL(Add, +)
@@ -209,5 +212,35 @@ TEST(float16, conversion_on_gpu) {
209212
EXPECT_EQ(v_assign.x, 0x3c00);
210213
}
211214

215+
TEST(float16, lod_tensor_on_gpu) {
216+
framework::LoDTensor src_tensor;
217+
framework::LoDTensor gpu_tensor;
218+
framework::LoDTensor dst_tensor;
219+
220+
float16* src_ptr = src_tensor.mutable_data<float16>(
221+
framework::make_ddim({2, 2}), CPUPlace());
222+
223+
float16 arr[4] = {float16(1.0f), float16(0.5f), float16(0.33333f),
224+
float16(0.0f)};
225+
memcpy(src_ptr, arr, 4 * sizeof(float16));
226+
227+
// CPU LoDTensor to GPU LoDTensor
228+
CUDAPlace gpu_place(0);
229+
CUDADeviceContext gpu_ctx(gpu_place);
230+
framework::TensorCopy(src_tensor, gpu_place, gpu_ctx, &gpu_tensor);
231+
232+
// GPU LoDTensor to CPU LoDTensor
233+
framework::TensorCopy(gpu_tensor, CPUPlace(), gpu_ctx, &dst_tensor);
234+
235+
// Sync before comparing LoDTensors
236+
gpu_ctx.Wait();
237+
const float16* dst_ptr = dst_tensor.data<float16>();
238+
ASSERT_NE(src_ptr, dst_ptr);
239+
for (size_t i = 0; i < 4; ++i) {
240+
EXPECT_EQ(src_ptr[i].x, dst_ptr[i].x);
241+
}
242+
}
243+
244+
} // namespace platform
212245
} // namespace paddle
213246
#endif // PADDLE_CUDA_FP16

0 commit comments

Comments
 (0)