PaddlePaddle
diff --git a/‎CMakeLists.txt‎
Lines changed: 2 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎paddle/function/nnpack/nnpack.cmake‎ ‎cmake/external/nnpack.cmake‎paddle/function/nnpack/nnpack.cmake renamed to cmake/external/nnpack.cmake
Lines changed: 14 additions & 0 deletions b/‎paddle/function/nnpack/nnpack.cmake‎ ‎cmake/external/nnpack.cmake‎paddle/function/nnpack/nnpack.cmake renamed to cmake/external/nnpack.cmake
Lines changed: 14 additions & 0 deletions
diff --git a/‎paddle/framework/ddim.cc‎
Lines changed: 2 additions & 0 deletions b/‎paddle/framework/ddim.cc‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/framework/ddim.h‎
Lines changed: 2 additions & 0 deletions b/‎paddle/framework/ddim.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/framework/ddim_test.cc‎
Lines changed: 1 addition & 0 deletions b/‎paddle/framework/ddim_test.cc‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/framework/op_registry.h‎
Lines changed: 24 additions & 2 deletions b/‎paddle/framework/op_registry.h‎
Lines changed: 24 additions & 2 deletions
diff --git a/‎paddle/framework/operator.cc‎
Lines changed: 13 additions & 15 deletions b/‎paddle/framework/operator.cc‎
Lines changed: 13 additions & 15 deletions
diff --git a/‎paddle/framework/operator.h‎
Lines changed: 7 additions & 0 deletions b/‎paddle/framework/operator.h‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎paddle/framework/tensor.h‎
Lines changed: 24 additions & 31 deletions b/‎paddle/framework/tensor.h‎
Lines changed: 24 additions & 31 deletions
@@ -137,7 +137,8 @@ if(WITH_GPU)
 endif(WITH_GPU)
 
 if(USE_NNPACK)
-  list(APPEND EXTERNAL_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB} "rt")
+    include(external/nnpack)
+    list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS})
 endif(USE_NNPACK)
 
 add_subdirectory(proto)
 
@@ -7,10 +7,24 @@ set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK")
 find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include)
 find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib)
 find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib)
+find_library(NNPACK_UKERNELS_LIB NAMES nnpack_ukernels PATHS ${NNPACK_ROOT}/lib)
+find_library(NNPACK_CPUFEATURES_LIB NAMES cpufeatures PATHS ${NNPACK_ROOT}/lib)
 
 if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB)
   set(NNPACK_FOUND ON)
   INCLUDE_DIRECTORIES(${NNPACK_INC_DIR})
+
+  set(NNPACK_LIBS)
+  list(APPEND NNPACK_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB})
+  if (NNPACK_UKERNELS_LIB)
+    list(APPEND NNPACK_LIBS ${NNPACK_UKERNELS_LIB})
+  endif()
+  if (NNPACK_CPUFEATURES_LIB)
+    list(APPEND NNPACK_LIBS ${NNPACK_CPUFEATURES_LIB})
+  endif()
+  if(NOT ANDROID)
+    list(APPEND NNPACK_LIBS "rt")
+  endif()
 else()
   message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})")
 endif()
@@ -117,6 +117,8 @@ int DDim::operator[](int idx) const {
   return boost::apply_visitor(DynamicConstIndexer(idx), var);
 }
 
+ssize_t DDim::size() const { return arity(*this); }
+
 bool DDim::operator==(DDim d) const {
   if (var.which() != d.getVar().which()) {
     return false;
 
@@ -65,6 +65,8 @@ struct DDim {
 
   DDimVar getVar() { return var; }
 
+  ssize_t size() const;
+
   bool operator==(DDim d) const;
 
   bool operator!=(DDim d) const;
 
@@ -49,6 +49,7 @@ TEST(DDim, Equality) {
 
   // arity of a DDim
   EXPECT_EQ(paddle::framework::arity(ddim), 3);
+  EXPECT_EQ(ddim.size(), 3);
 
   // product of a DDim
   EXPECT_EQ(paddle::framework::product(vddim), 45);
 
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <algorithm>
+#include <atomic>
 #include <type_traits>
 #include <unordered_map>
 #include <unordered_set>
@@ -214,25 +215,35 @@ class OpRegistry {
   }
 
   static OperatorPtr CreateOp(const OpDesc& op_desc) {
+    //! Create a OpPtr by type.
     std::string op_type = op_desc.type();
     OperatorPtr op(creators().at(op_type)());
+    //! Fill op's data member. Not use constructor because it will be noising
+    //! for Op developer.
     const OpProto& op_proto = protos().at(op_type);
-    // set op's inputs_ from desc.
     op->type_ = op_desc.type();
+    // set op's inputs_ from desc.
     op->inputs_.reserve((size_t)op_desc.inputs_size());
     std::copy(op_desc.inputs().begin(), op_desc.inputs().end(),
               std::back_inserter(op->inputs_));
     // set op's outputs_ from desc.
     op->outputs_.reserve((size_t)op_desc.outputs_size());
     std::copy(op_desc.outputs().begin(), op_desc.outputs().end(),
               std::back_inserter(op->outputs_));
-    // set op's attr;
+
+    //! Fill attrs, and validate attrs.
     for (auto& attr : op_desc.attrs()) {
       op->attrs_[attr.name()] = AttrTypeHelper::GetAttrValue(attr);
     }
     op_checkers().at(op_type).Check(op->attrs_);
+
+    //! Convert Temporary variable name to an unique variable name.
+    GenerateTempVariableName(op.get());
+
     // set argument offsets stored in op.
     CreateInOutOffsetMap(op, op_proto);
+    //! Other op's custom Init for a complex Op. For simple Op, the Init
+    //! method do nothing.
     op->Init();
     return op;
   }
@@ -248,6 +259,17 @@ class OpRegistry {
   };
 
  private:
+  static void GenerateTempVariableName(OperatorBase* op) {
+    static std::atomic<size_t> gUniqId(0UL);
+    for (auto& outname : op->outputs_) {
+      if (outname == OperatorBase::TMP_VAR_NAME()) {
+        outname += op->type_;
+        outname += "@";
+        outname += std::to_string(gUniqId.fetch_add(1));
+      }
+    }
+  }
+
   static std::unordered_map<std::string, OpCreator>& creators() {
     static std::unordered_map<std::string, OpCreator> creators_;
     return creators_;
 
@@ -91,23 +91,21 @@ std::vector<std::string> OperatorBase::Outputs(const std::string& name) const {
 
 std::string OperatorBase::DebugString() const {
   std::stringstream ss;
-  ss << "=================\n";
-  ss << "type = " << type_ << "\n";
-  ss << "inputs = [";
-  for (auto& ipt : inputs_) {
-    ss << ipt << ", ";
+  ss << "Op(" << type_ << "), inputs:(";
+  for (size_t i = 0; i < inputs_.size(); ++i) {
+    ss << inputs_[i];
+    if (i != inputs_.size() - 1) {
+      ss << ", ";
+    }
   }
-  ss << "]\n";
-  ss << "outputs = [";
-  for (auto& opt : outputs_) {
-    ss << opt << ", ";
+  ss << "), outputs:(";
+  for (size_t i = 0; i < outputs_.size(); ++i) {
+    ss << outputs_[i];
+    if (i != outputs_.size() - 1) {
+      ss << ", ";
+    }
   }
-  ss << "]\n";
-  ss << "attr_keys = [";
-  for (auto& attr : attrs_) {
-    ss << attr.first << ", ";
-  }
-  ss << "]\n";
+  ss << ").";
   return ss.str();
 }
 
 
@@ -56,6 +56,13 @@ using OperatorPtr = std::shared_ptr<OperatorBase>;
  */
 class OperatorBase {
  public:
+  /// If a variable is a empty variable, that name will be used.
+  static std::string EMPTY_VAR_NAME() { return "@EMPTY@"; }
+
+  /// If a variable is a temporary variable, that name will be set in Python,
+  /// but it will be convert to a unique name in scope after OpCreator.
+  static std::string TMP_VAR_NAME() { return "@TEMP@"; }
+
   virtual ~OperatorBase() {}
 
   template <typename T>
 
@@ -29,9 +29,7 @@ namespace framework {
 
 class Tensor {
  public:
-  Tensor() : numel_(0), offset_(0) {}
-
-  Tensor& operator=(const Tensor& src) = delete;
+  Tensor() : offset_(0) {}
 
   template <typename T>
   const T* data() const {
@@ -48,34 +46,33 @@ class Tensor {
   }
 
   template <typename T>
-  T* mutable_data(DDim dims, paddle::platform::Place place) {
+  T* mutable_data(DDim dims, platform::Place place) {
     set_dims(dims);
     return mutable_data<T>(place);
   }
 
   template <typename T>
-  T* mutable_data(paddle::platform::Place place) {
-    PADDLE_ENFORCE(numel_ > 0,
-                   "Tensor::numel_ must be larger than zero to call "
+  T* mutable_data(platform::Place place) {
+    PADDLE_ENFORCE(product(dims_) > 0,
+                   "Tensor's numel must be larger than zero to call "
                    "Tensor::mutable_data. Call Tensor::set_dim first.");
     if (holder_ == nullptr ||
         !(holder_->place() ==
           place) /* some versions of boost::variant don't have operator!= */
-        || holder_->size() < numel_ * sizeof(T) + offset_) {
+        || holder_->size() < product(dims_) * sizeof(T) + offset_) {
       if (platform::is_cpu_place(place)) {
         holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
-            boost::get<platform::CPUPlace>(place), numel_ * sizeof(T)));
-      }
+            boost::get<platform::CPUPlace>(place), product(dims_) * sizeof(T)));
+      } else if (platform::is_gpu_place(place)) {
 #ifdef __CUDACC__
-      else if (platform::is_gpu_place(place)) {
         holder_.reset(new PlaceholderImpl<T, platform::GPUPlace>(
-            boost::get<platform::GPUPlace>(place), numel_ * sizeof(T)));
-      }
+            boost::get<platform::GPUPlace>(place), product(dims_) * sizeof(T)));
 #else
-      else if (platform::is_gpu_place(place)) {
-        PADDLE_ENFORCE(true, "GPU not support!");
-      }
+        PADDLE_ENFORCE(true, "'GPUPlace' is not supported in CPU only device.");
 #endif
+      } else {
+        PADDLE_ENFORCE(true, "Unknown 'place'.");
+      }
       offset_ = 0;
     }
     return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
@@ -98,7 +95,7 @@ class Tensor {
   // flat to rank = 1
   template <typename T>
   typename TTypes<T>::Flat flat() {
-    return shaped<T, 1>(make_ddim({static_cast<int>(numel_)}));
+    return shaped<T, 1>(make_ddim({static_cast<int>(product(dims_))}));
   }
 
   // to TensorType Vec
@@ -129,7 +126,7 @@ class Tensor {
 
   template <typename T>
   typename TTypes<T>::ConstFlat flat() const {
-    return shaped<T, 1>(make_ddim({static_cast<int>(numel_)}));
+    return shaped<T, 1>(make_ddim({static_cast<int>(product(dims_))}));
   }
 
   template <typename T>
@@ -151,12 +148,12 @@ class Tensor {
   }
 
   template <typename T>
-  void CopyFrom(const Tensor& src, paddle::platform::Place dst_place) {
+  void CopyFrom(const Tensor& src, platform::Place dst_place) {
     PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) &&
                        platform::is_cpu_place(dst_place),
                    "Tensor::CopyFrom only support CPU now.");
     src.CheckDims<T>();
-    size_t size = src.numel_ * sizeof(T);
+    size_t size = product(src.dims_) * sizeof(T);
     set_dims(src.dims());
     const void* src_ptr = static_cast<const void*>(src.data<T>());
     void* dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
@@ -190,7 +187,6 @@ class Tensor {
       return;
     }
     dims_ = dims;
-    numel_ = product(dims_);
   }
 
   DDim dims() const { return dims_; }
@@ -201,7 +197,7 @@ class Tensor {
   struct Placeholder {
     virtual ~Placeholder() {}
     virtual void* ptr() const = 0;
-    virtual paddle::platform::Place place() const = 0;
+    virtual platform::Place place() const = 0;
     virtual size_t size() const = 0;
   };
 
@@ -212,42 +208,39 @@ class Tensor {
     class Deleter {
      public:
       Deleter(PType place) : place_(place) {}
-      void operator()(T* ptr) {
-        paddle::memory::Free(place_, static_cast<void*>(ptr));
-      }
+      void operator()(T* ptr) { memory::Free(place_, static_cast<void*>(ptr)); }
 
      private:
       PType place_;
     };
 
    public:
     PlaceholderImpl(PlaceType place, size_t size)
-        : ptr_(static_cast<T*>(paddle::memory::Alloc(place, size)),
+        : ptr_(static_cast<T*>(memory::Alloc(place, size)),
                Deleter<PlaceType>(place)),
           place_(place),
           size_(size) {}
 
     virtual void* ptr() const { return static_cast<void*>(ptr_.get()); }
     virtual size_t size() const { return size_; }
-    virtual paddle::platform::Place place() const { return place_; }
+    virtual platform::Place place() const { return place_; }
 
     std::unique_ptr<T, Deleter<PlaceType>> ptr_;
-    paddle::platform::Place place_;  // record the place of ptr_.
-    size_t size_;                    // size of the memory block.
+    platform::Place place_;  // record the place of ptr_.
+    size_t size_;            // size of the memory block.
   };
 
   template <typename T>
   inline void CheckDims() const {
     PADDLE_ENFORCE(holder_ != nullptr,
                    "Tenosr holds no memory. Call Tensor::mutable_data first.");
-    PADDLE_ENFORCE(holder_->size() >= numel_ * sizeof(T) + offset_,
+    PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_,
                    "Tensor's dims_ is out of bound. Call Tensor::mutable_data "
                    "first to re-allocate memory.");
   }
 
   std::shared_ptr<Placeholder> holder_;  // holds the memory block if allocated.
   DDim dims_;
-  size_t numel_;   // cache of `product(dims_)`
   size_t offset_;  // marks the begin of tensor data area.
 };
Original file line number	Diff line number	Diff line change
`@@ -117,6 +117,8 @@ int DDim::operator[](int idx) const {`
`117`	`117`	`return boost::apply_visitor(DynamicConstIndexer(idx), var);`
`118`	`118`	`}`
`119`	`119`
	`120`	`+ssize_t DDim::size() const { return arity(*this); }`
	`121`	`+`
`120`	`122`	`bool DDim::operator==(DDim d) const {`
`121`	`123`	`if (var.which() != d.getVar().which()) {`
`122`	`124`	`return false;`