Skip to content

Commit 4d49f1d

Browse files
committed
merge baidu/develop
2 parents 8718966 + f146b03 commit 4d49f1d

File tree

16 files changed

+613
-70
lines changed

16 files changed

+613
-70
lines changed

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ if(WITH_GPU)
137137
endif(WITH_GPU)
138138

139139
if(USE_NNPACK)
140-
list(APPEND EXTERNAL_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB} "rt")
140+
include(external/nnpack)
141+
list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS})
141142
endif(USE_NNPACK)
142143

143144
add_subdirectory(proto)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,24 @@ set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK")
77
find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include)
88
find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib)
99
find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib)
10+
find_library(NNPACK_UKERNELS_LIB NAMES nnpack_ukernels PATHS ${NNPACK_ROOT}/lib)
11+
find_library(NNPACK_CPUFEATURES_LIB NAMES cpufeatures PATHS ${NNPACK_ROOT}/lib)
1012

1113
if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB)
1214
set(NNPACK_FOUND ON)
1315
INCLUDE_DIRECTORIES(${NNPACK_INC_DIR})
16+
17+
set(NNPACK_LIBS)
18+
list(APPEND NNPACK_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB})
19+
if (NNPACK_UKERNELS_LIB)
20+
list(APPEND NNPACK_LIBS ${NNPACK_UKERNELS_LIB})
21+
endif()
22+
if (NNPACK_CPUFEATURES_LIB)
23+
list(APPEND NNPACK_LIBS ${NNPACK_CPUFEATURES_LIB})
24+
endif()
25+
if(NOT ANDROID)
26+
list(APPEND NNPACK_LIBS "rt")
27+
endif()
1428
else()
1529
message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})")
1630
endif()

paddle/framework/ddim.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ int DDim::operator[](int idx) const {
117117
return boost::apply_visitor(DynamicConstIndexer(idx), var);
118118
}
119119

120+
ssize_t DDim::size() const { return arity(*this); }
121+
120122
bool DDim::operator==(DDim d) const {
121123
if (var.which() != d.getVar().which()) {
122124
return false;

paddle/framework/ddim.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ struct DDim {
6565

6666
DDimVar getVar() { return var; }
6767

68+
ssize_t size() const;
69+
6870
bool operator==(DDim d) const;
6971

7072
bool operator!=(DDim d) const;

paddle/framework/ddim_test.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ TEST(DDim, Equality) {
4949

5050
// arity of a DDim
5151
EXPECT_EQ(paddle::framework::arity(ddim), 3);
52+
EXPECT_EQ(ddim.size(), 3);
5253

5354
// product of a DDim
5455
EXPECT_EQ(paddle::framework::product(vddim), 45);

paddle/framework/op_registry.h

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include <algorithm>
4+
#include <atomic>
45
#include <type_traits>
56
#include <unordered_map>
67
#include <unordered_set>
@@ -214,25 +215,35 @@ class OpRegistry {
214215
}
215216

216217
static OperatorPtr CreateOp(const OpDesc& op_desc) {
218+
//! Create a OpPtr by type.
217219
std::string op_type = op_desc.type();
218220
OperatorPtr op(creators().at(op_type)());
221+
//! Fill op's data member. Not use constructor because it will be noising
222+
//! for Op developer.
219223
const OpProto& op_proto = protos().at(op_type);
220-
// set op's inputs_ from desc.
221224
op->type_ = op_desc.type();
225+
// set op's inputs_ from desc.
222226
op->inputs_.reserve((size_t)op_desc.inputs_size());
223227
std::copy(op_desc.inputs().begin(), op_desc.inputs().end(),
224228
std::back_inserter(op->inputs_));
225229
// set op's outputs_ from desc.
226230
op->outputs_.reserve((size_t)op_desc.outputs_size());
227231
std::copy(op_desc.outputs().begin(), op_desc.outputs().end(),
228232
std::back_inserter(op->outputs_));
229-
// set op's attr;
233+
234+
//! Fill attrs, and validate attrs.
230235
for (auto& attr : op_desc.attrs()) {
231236
op->attrs_[attr.name()] = AttrTypeHelper::GetAttrValue(attr);
232237
}
233238
op_checkers().at(op_type).Check(op->attrs_);
239+
240+
//! Convert Temporary variable name to an unique variable name.
241+
GenerateTempVariableName(op.get());
242+
234243
// set argument offsets stored in op.
235244
CreateInOutOffsetMap(op, op_proto);
245+
//! Other op's custom Init for a complex Op. For simple Op, the Init
246+
//! method do nothing.
236247
op->Init();
237248
return op;
238249
}
@@ -248,6 +259,17 @@ class OpRegistry {
248259
};
249260

250261
private:
262+
static void GenerateTempVariableName(OperatorBase* op) {
263+
static std::atomic<size_t> gUniqId(0UL);
264+
for (auto& outname : op->outputs_) {
265+
if (outname == OperatorBase::TMP_VAR_NAME()) {
266+
outname += op->type_;
267+
outname += "@";
268+
outname += std::to_string(gUniqId.fetch_add(1));
269+
}
270+
}
271+
}
272+
251273
static std::unordered_map<std::string, OpCreator>& creators() {
252274
static std::unordered_map<std::string, OpCreator> creators_;
253275
return creators_;

paddle/framework/operator.cc

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -91,23 +91,21 @@ std::vector<std::string> OperatorBase::Outputs(const std::string& name) const {
9191

9292
std::string OperatorBase::DebugString() const {
9393
std::stringstream ss;
94-
ss << "=================\n";
95-
ss << "type = " << type_ << "\n";
96-
ss << "inputs = [";
97-
for (auto& ipt : inputs_) {
98-
ss << ipt << ", ";
94+
ss << "Op(" << type_ << "), inputs:(";
95+
for (size_t i = 0; i < inputs_.size(); ++i) {
96+
ss << inputs_[i];
97+
if (i != inputs_.size() - 1) {
98+
ss << ", ";
99+
}
99100
}
100-
ss << "]\n";
101-
ss << "outputs = [";
102-
for (auto& opt : outputs_) {
103-
ss << opt << ", ";
101+
ss << "), outputs:(";
102+
for (size_t i = 0; i < outputs_.size(); ++i) {
103+
ss << outputs_[i];
104+
if (i != outputs_.size() - 1) {
105+
ss << ", ";
106+
}
104107
}
105-
ss << "]\n";
106-
ss << "attr_keys = [";
107-
for (auto& attr : attrs_) {
108-
ss << attr.first << ", ";
109-
}
110-
ss << "]\n";
108+
ss << ").";
111109
return ss.str();
112110
}
113111

paddle/framework/operator.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,13 @@ using OperatorPtr = std::shared_ptr<OperatorBase>;
5656
*/
5757
class OperatorBase {
5858
public:
59+
/// If a variable is a empty variable, that name will be used.
60+
static std::string EMPTY_VAR_NAME() { return "@EMPTY@"; }
61+
62+
/// If a variable is a temporary variable, that name will be set in Python,
63+
/// but it will be convert to a unique name in scope after OpCreator.
64+
static std::string TMP_VAR_NAME() { return "@TEMP@"; }
65+
5966
virtual ~OperatorBase() {}
6067

6168
template <typename T>

paddle/framework/tensor.h

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,7 @@ namespace framework {
2929

3030
class Tensor {
3131
public:
32-
Tensor() : numel_(0), offset_(0) {}
33-
34-
Tensor& operator=(const Tensor& src) = delete;
32+
Tensor() : offset_(0) {}
3533

3634
template <typename T>
3735
const T* data() const {
@@ -48,34 +46,33 @@ class Tensor {
4846
}
4947

5048
template <typename T>
51-
T* mutable_data(DDim dims, paddle::platform::Place place) {
49+
T* mutable_data(DDim dims, platform::Place place) {
5250
set_dims(dims);
5351
return mutable_data<T>(place);
5452
}
5553

5654
template <typename T>
57-
T* mutable_data(paddle::platform::Place place) {
58-
PADDLE_ENFORCE(numel_ > 0,
59-
"Tensor::numel_ must be larger than zero to call "
55+
T* mutable_data(platform::Place place) {
56+
PADDLE_ENFORCE(product(dims_) > 0,
57+
"Tensor's numel must be larger than zero to call "
6058
"Tensor::mutable_data. Call Tensor::set_dim first.");
6159
if (holder_ == nullptr ||
6260
!(holder_->place() ==
6361
place) /* some versions of boost::variant don't have operator!= */
64-
|| holder_->size() < numel_ * sizeof(T) + offset_) {
62+
|| holder_->size() < product(dims_) * sizeof(T) + offset_) {
6563
if (platform::is_cpu_place(place)) {
6664
holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
67-
boost::get<platform::CPUPlace>(place), numel_ * sizeof(T)));
68-
}
65+
boost::get<platform::CPUPlace>(place), product(dims_) * sizeof(T)));
66+
} else if (platform::is_gpu_place(place)) {
6967
#ifdef __CUDACC__
70-
else if (platform::is_gpu_place(place)) {
7168
holder_.reset(new PlaceholderImpl<T, platform::GPUPlace>(
72-
boost::get<platform::GPUPlace>(place), numel_ * sizeof(T)));
73-
}
69+
boost::get<platform::GPUPlace>(place), product(dims_) * sizeof(T)));
7470
#else
75-
else if (platform::is_gpu_place(place)) {
76-
PADDLE_ENFORCE(true, "GPU not support!");
77-
}
71+
PADDLE_ENFORCE(true, "'GPUPlace' is not supported in CPU only device.");
7872
#endif
73+
} else {
74+
PADDLE_ENFORCE(true, "Unknown 'place'.");
75+
}
7976
offset_ = 0;
8077
}
8178
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
@@ -98,7 +95,7 @@ class Tensor {
9895
// flat to rank = 1
9996
template <typename T>
10097
typename TTypes<T>::Flat flat() {
101-
return shaped<T, 1>(make_ddim({static_cast<int>(numel_)}));
98+
return shaped<T, 1>(make_ddim({static_cast<int>(product(dims_))}));
10299
}
103100

104101
// to TensorType Vec
@@ -129,7 +126,7 @@ class Tensor {
129126

130127
template <typename T>
131128
typename TTypes<T>::ConstFlat flat() const {
132-
return shaped<T, 1>(make_ddim({static_cast<int>(numel_)}));
129+
return shaped<T, 1>(make_ddim({static_cast<int>(product(dims_))}));
133130
}
134131

135132
template <typename T>
@@ -151,12 +148,12 @@ class Tensor {
151148
}
152149

153150
template <typename T>
154-
void CopyFrom(const Tensor& src, paddle::platform::Place dst_place) {
151+
void CopyFrom(const Tensor& src, platform::Place dst_place) {
155152
PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) &&
156153
platform::is_cpu_place(dst_place),
157154
"Tensor::CopyFrom only support CPU now.");
158155
src.CheckDims<T>();
159-
size_t size = src.numel_ * sizeof(T);
156+
size_t size = product(src.dims_) * sizeof(T);
160157
set_dims(src.dims());
161158
const void* src_ptr = static_cast<const void*>(src.data<T>());
162159
void* dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
@@ -190,7 +187,6 @@ class Tensor {
190187
return;
191188
}
192189
dims_ = dims;
193-
numel_ = product(dims_);
194190
}
195191

196192
DDim dims() const { return dims_; }
@@ -201,7 +197,7 @@ class Tensor {
201197
struct Placeholder {
202198
virtual ~Placeholder() {}
203199
virtual void* ptr() const = 0;
204-
virtual paddle::platform::Place place() const = 0;
200+
virtual platform::Place place() const = 0;
205201
virtual size_t size() const = 0;
206202
};
207203

@@ -212,42 +208,39 @@ class Tensor {
212208
class Deleter {
213209
public:
214210
Deleter(PType place) : place_(place) {}
215-
void operator()(T* ptr) {
216-
paddle::memory::Free(place_, static_cast<void*>(ptr));
217-
}
211+
void operator()(T* ptr) { memory::Free(place_, static_cast<void*>(ptr)); }
218212

219213
private:
220214
PType place_;
221215
};
222216

223217
public:
224218
PlaceholderImpl(PlaceType place, size_t size)
225-
: ptr_(static_cast<T*>(paddle::memory::Alloc(place, size)),
219+
: ptr_(static_cast<T*>(memory::Alloc(place, size)),
226220
Deleter<PlaceType>(place)),
227221
place_(place),
228222
size_(size) {}
229223

230224
virtual void* ptr() const { return static_cast<void*>(ptr_.get()); }
231225
virtual size_t size() const { return size_; }
232-
virtual paddle::platform::Place place() const { return place_; }
226+
virtual platform::Place place() const { return place_; }
233227

234228
std::unique_ptr<T, Deleter<PlaceType>> ptr_;
235-
paddle::platform::Place place_; // record the place of ptr_.
236-
size_t size_; // size of the memory block.
229+
platform::Place place_; // record the place of ptr_.
230+
size_t size_; // size of the memory block.
237231
};
238232

239233
template <typename T>
240234
inline void CheckDims() const {
241235
PADDLE_ENFORCE(holder_ != nullptr,
242236
"Tenosr holds no memory. Call Tensor::mutable_data first.");
243-
PADDLE_ENFORCE(holder_->size() >= numel_ * sizeof(T) + offset_,
237+
PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_,
244238
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
245239
"first to re-allocate memory.");
246240
}
247241

248242
std::shared_ptr<Placeholder> holder_; // holds the memory block if allocated.
249243
DDim dims_;
250-
size_t numel_; // cache of `product(dims_)`
251244
size_t offset_; // marks the begin of tensor data area.
252245
};
253246

0 commit comments

Comments
 (0)