PaddlePaddle · wawltor · Aug 24, 2020 · Aug 13, 2020 · Aug 14, 2020 · Aug 14, 2020
diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc
@@ -350,6 +350,61 @@ inline void Any(const framework::Tensor& tensor, Predicate predicate,
   platform::VisitPlace(place, visitor);
 }
 
+template <typename Predicate, typename DevCtx>
+struct AllDTypeVisitor {
+  Predicate predicate_;
+  const Tensor& tensor_;
+  const DevCtx& ctx_;
+  Tensor* out_;
+
+  AllDTypeVisitor(Predicate predicate, const Tensor& tensor, const DevCtx& ctx,
+                  Tensor* out)
+      : predicate_(predicate), tensor_(tensor), ctx_(ctx), out_(out) {}
+
+  template <typename T>
+  void apply() const {
+    auto t = EigenVector<T>::Flatten(tensor_);
+    auto o = EigenVector<bool>::Flatten(*out_);
+    o.device(*ctx_.eigen_device()) = predicate_(t);
+  }
+};
+
+template <typename Predicate, typename DevCtx>
+inline void AllImpl(Predicate predicate, const framework::Tensor& tensor,
+                    const DevCtx& ctx, framework::Tensor* out) {
+  VisitDataType(tensor.type(), AllDTypeVisitor<Predicate, DevCtx>(
+                                   predicate, tensor, ctx, out));
+}
+
+template <typename Predicate>
+class AllOutVisitor : public boost::static_visitor<> {
+ private:
+  const framework::Tensor& tensor_;
+  mutable framework::Tensor* out_;
+  Predicate predicate_;
+
+ public:
+  AllOutVisitor(const framework::Tensor& tensor, Predicate predicate,
+                framework::Tensor* out)
+      : tensor_(tensor), predicate_(predicate), out_(out) {}
+
+  template <typename Place>
+  void operator()(const Place& place) const {
+    auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place);
+    out_->Resize(tensor_.dims());
+    out_->mutable_data<bool>(place);
+    AllImpl(predicate_, tensor_, *ctx, out_);
+  }
+};
+
+template <typename Predicate>
+inline void All(const framework::Tensor& tensor, Predicate predicate,
+                framework::Tensor* out) {
+  AllOutVisitor<Predicate> visitor(tensor, predicate, out);
+  auto place = tensor.place();
+  platform::VisitPlace(place, visitor);
+}
+
 struct ContainsNANPredicate {
   template <typename T>
   auto operator()(const T& eigen_vec) const
@@ -370,6 +425,12 @@ void TensorContainsNAN(const framework::Tensor& tensor,
   Any(tensor, predicate, out);
 }
 
+void TensorContainsNANV2(const framework::Tensor& tensor,
+                         framework::Tensor* out) {
+  ContainsNANPredicate predicate;
+  All(tensor, predicate, out);
+}
+
 struct ContainsInfPredicate {
   template <typename T>
   auto operator()(const T& eigen_vec) const
@@ -390,6 +451,12 @@ void TensorContainsInf(const framework::Tensor& tensor,
   Any(tensor, predicate, out);
 }
 
+void TensorContainsInfV2(const framework::Tensor& tensor,
+                         framework::Tensor* out) {
+  ContainsInfPredicate predicate;
+  All(tensor, predicate, out);
+}
+
 // NOTE(dzhwinter):
 // Isfinite need a AllVisitor to loop through all the elements.
 // We choose two cuda call instead of one allvisitor. The AllVisitor
@@ -402,8 +469,8 @@ bool TensorIsfinite(const framework::Tensor& tensor) {
 
 #ifdef PADDLE_WITH_CUDA
 template <typename T>
-static inline void __global__ BothFalse(const T* cmp, T* out) {
-  out[0] = (!cmp[0]) && (!out[0]);
+static inline void __global__ BothFalse(const T* cmp, T* out, int element_num) {
+  CUDA_KERNEL_LOOP(i, element_num) { out[i] = (!cmp[i]) && (!out[i]); }
 }
 #endif
 
@@ -421,22 +488,36 @@ struct BothFalseVisitor : public boost::static_visitor<> {
   void VisitorImpl(const platform::CUDAPlace& gpu) const {
 #ifdef PADDLE_WITH_CUDA
     auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(gpu);
-    BothFalse<bool><<<1, 1, 0, ctx->stream()>>>(in_.data<bool>(),
-                                                out_->mutable_data<bool>(gpu));
+    constexpr int MAX_BLOCK_DIM = 512;
+    constexpr int MAX_GRID_DIM = 65535;
+    int element_num = in_.numel();
+    int block_size = (element_num >= MAX_BLOCK_DIM)
+                         ? MAX_BLOCK_DIM
+                         : (1 << static_cast<int>(std::log2(element_num)));
+    int grid_size = element_num / block_size;
+    grid_size = (grid_size >= MAX_GRID_DIM) ? MAX_GRID_DIM : grid_size;
+    BothFalse<bool><<<grid_size, block_size, 0, ctx->stream()>>>(
+        in_.data<bool>(), out_->mutable_data<bool>(gpu), element_num);
 #endif
   }
 
   void VisitorImpl(const platform::CPUPlace& cpu) const {
-    bool lhs = !in_.data<bool>()[0];
-    bool rhs = !out_->mutable_data<bool>(cpu)[0];
-    out_->mutable_data<bool>(cpu)[0] = lhs && rhs;
+    int num = in_.numel();
+    for (int i = 0; i < num; ++i) {
+      bool lhs = !in_.data<bool>()[i];
+      bool rhs = !out_->mutable_data<bool>(cpu)[i];
+      out_->mutable_data<bool>(cpu)[i] = lhs && rhs;
+    }
   }
 
   void VisitorImpl(
       const platform::CUDAPinnedPlace& cpu /* equals to cpu*/) const {
-    bool lhs = !in_.data<bool>()[0];
-    bool rhs = !out_->mutable_data<bool>(cpu)[0];
-    out_->mutable_data<bool>(cpu)[0] = lhs && rhs;
+    int num = in_.numel();
+    for (int i = 0; i < num; ++i) {
+      bool lhs = !in_.data<bool>()[i];
+      bool rhs = !out_->mutable_data<bool>(cpu)[i];
+      out_->mutable_data<bool>(cpu)[i] = lhs && rhs;
+    }
   }
 };
 
@@ -449,6 +530,15 @@ void TensorIsfinite(const framework::Tensor& tensor, framework::Tensor* out) {
   platform::VisitPlace(place, visitor);
 }
 
+void TensorIsfiniteV2(const framework::Tensor& tensor, framework::Tensor* out) {
+  framework::Tensor tmp;
+  TensorContainsInfV2(tensor, &tmp);
+  TensorContainsNANV2(tensor, out);
+  BothFalseVisitor visitor(tmp, out);
+  auto place = tensor.place();
+  platform::VisitPlace(place, visitor);
+}
+
 void TensorToStream(std::ostream& os, const Tensor& tensor,
                     const platform::DeviceContext& dev_ctx) {
   {  // the 1st field, uint32_t version

diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h
@@ -76,6 +76,13 @@ void TensorFromStream(std::istream& is, Tensor* tensor,
                       const platform::DeviceContext& dev_ctx,
                       const size_t& seek, const std::vector<int64_t>& shape);
 
+// store the bool result tensor in out tensor
+void TensorContainsNANV2(const framework::Tensor& tensor,
+                         framework::Tensor* out);
+void TensorContainsInfV2(const framework::Tensor& tensor,
+                         framework::Tensor* out);
+void TensorIsfiniteV2(const framework::Tensor& tensor, framework::Tensor* out);
+
 // convert dlpack's DLTensor to tensor
 void TensorFromDLPack(const ::DLTensor& dl_tensor, framework::Tensor* dst);
 

diff --git a/paddle/fluid/operators/isfinite_v2_op.cc b/paddle/fluid/operators/isfinite_v2_op.cc
@@ -0,0 +1,119 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/operators/isfinite_v2_op.h"
+#include <string>
+#include <vector>
+#include "paddle/fluid/operators/common_infer_shape_functions.h"
+#include "paddle/fluid/platform/float16.h"
+
+namespace plat = paddle::platform;
+
+namespace paddle {
+namespace operators {
+
+class OverflowV2Op : public framework::OperatorWithKernel {
+ public:
+  OverflowV2Op(const std::string &type,
+               const framework::VariableNameMap &inputs,
+               const framework::VariableNameMap &outputs,
+               const framework::AttributeMap &attrs)
+      : OperatorWithKernel(type, inputs, outputs, attrs) {}
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "isfinitev2");
+    OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "isfinitev2");
+    UnaryOpUnchangedInferShape(ctx);
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext &ctx) const override {
+    int dtype = -1;
+    auto *x_var = ctx.InputVar("X");
+    if (x_var->IsType<framework::LoDTensor>()) {
+      dtype = x_var->Get<framework::LoDTensor>().type();
+    } else if (x_var->IsType<framework::SelectedRows>()) {
+      dtype = x_var->Get<framework::SelectedRows>().value().type();
+    } else {
+      PADDLE_THROW(plat::errors::InvalidArgument(
+          "Cannot find the input data type by all input data"));
+    }
+    return framework::OpKernelType(framework::proto::VarType::Type(dtype),
+                                   ctx.GetPlace());
+  }
+};
+
+class OverflowV2OpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("X", "(Tensor) The input tensors of overflowv2 operator.");
+    AddOutput("Out",
+              "(Tensor) The output tensor of overflowv2 operator. "
+              "Same size compare to input tensor");
+    AddComment(string::Sprintf(R"DOC(
+Overflow %s operator.
+
+$$Out = %s(X)$$
+
+Check whether each element of X is Inf or Nan, return the bool result of each
+element of X as a tensor.
+
+%s
+)DOC",
+                               GetName(), GetComments()));
+  }
+
+ protected:
+  virtual std::string GetName() const = 0;
+  virtual std::string GetComments() const = 0;
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+#define REGISTER_V2OP_MAKER(op_type, comment)                         \
+  namespace paddle {                                                  \
+  namespace operators {                                               \
+  class _##op_type##OverflowV2OpMaker                                 \
+      : public ::paddle::operators::OverflowV2OpMaker {               \
+   protected:                                                         \
+    std::string GetName() const { return #op_type; }                  \
+    std::string GetComments() const { return comment; }               \
+  };                                                                  \
+  }                                                                   \
+  }                                                                   \
+  REGISTER_OPERATOR(                                                  \
+      op_type, ops::OverflowV2Op, ops::_##op_type##OverflowV2OpMaker, \
+      paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, \
+      paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>)
+
+#define REGISTER_OVERFLOW_CPU_KERNEL(op_type, functor)                       \
+  REGISTER_OP_CPU_KERNEL(                                                    \
+      op_type, ops::OverflowKernel<paddle::platform::CPUDeviceContext, int,  \
+                                   ops::functor>,                            \
+      ops::OverflowKernel<paddle::platform::CPUDeviceContext, int64_t,       \
+                          ops::functor>,                                     \
+      ops::OverflowKernel<paddle::platform::CPUDeviceContext, float,         \
+                          ops::functor>,                                     \
+      ops::OverflowKernel<paddle::platform::CPUDeviceContext, double,        \
+                          ops::functor>,                                     \
+      ops::OverflowKernel<paddle::platform::CPUDeviceContext, plat::float16, \
+                          ops::functor>);
+
+REGISTER_V2OP_MAKER(isinf_v2, "isinfv2(X)");
+REGISTER_V2OP_MAKER(isnan_v2, "isnanv2(X)");
+REGISTER_V2OP_MAKER(isfinite_v2, "isfinitev2(X)");
+FOR_EACH_KERNEL_V2FUNCTOR(REGISTER_OVERFLOW_CPU_KERNEL);
diff --git a/paddle/fluid/operators/isfinite_v2_op.cu b/paddle/fluid/operators/isfinite_v2_op.cu
@@ -0,0 +1,34 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/operators/isfinite_v2_op.h"
+#include "paddle/fluid/platform/float16.h"
+
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+#define REGISTER_OVERFLOW_CUDA_KERNEL(op_type, functor)                       \
+  REGISTER_OP_CUDA_KERNEL(                                                    \
+      op_type, ops::OverflowKernel<paddle::platform::CUDADeviceContext, int,  \
+                                   ops::functor>,                             \
+      ops::OverflowKernel<paddle::platform::CUDADeviceContext, int64_t,       \
+                          ops::functor>,                                      \
+      ops::OverflowKernel<paddle::platform::CUDADeviceContext, float,         \
+                          ops::functor>,                                      \
+      ops::OverflowKernel<paddle::platform::CUDADeviceContext, double,        \
+                          ops::functor>,                                      \
+      ops::OverflowKernel<paddle::platform::CUDADeviceContext, plat::float16, \
+                          ops::functor>);
+
+FOR_EACH_KERNEL_V2FUNCTOR(REGISTER_OVERFLOW_CUDA_KERNEL);
diff --git a/paddle/fluid/operators/isfinite_v2_op.h b/paddle/fluid/operators/isfinite_v2_op.h
@@ -0,0 +1,52 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <vector>
+#include "paddle/fluid/framework/eigen.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/tensor_util.h"
+#include "paddle/fluid/operators/isfinite_op.h"
+#include "paddle/fluid/platform/float16.h"
+#include "paddle/fluid/platform/transform.h"
+
+namespace paddle {
+namespace operators {
+
+struct InfinityV2Functor {
+  void operator()(const framework::Tensor& tensor, framework::Tensor* out) {
+    framework::TensorContainsInfV2(tensor, out);
+  }
+};
+
+struct NANV2Functor {
+  void operator()(const framework::Tensor& tensor, framework::Tensor* out) {
+    framework::TensorContainsNANV2(tensor, out);
+  }
+};
+
+struct IsfiniteV2Functor {
+  void operator()(const framework::Tensor& tensor, framework::Tensor* out) {
+    framework::TensorIsfiniteV2(tensor, out);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+#define FOR_EACH_KERNEL_V2FUNCTOR(__macro) \
+  __macro(isinf_v2, InfinityV2Functor);    \
+  __macro(isnan_v2, NANV2Functor);         \
+  __macro(isfinite_v2, IsfiniteV2Functor);
diff --git a/paddle/fluid/operators/nll_loss_op.cc b/paddle/fluid/operators/nll_loss_op.cc
@@ -55,8 +55,8 @@ class NLLLossOp : public framework::OperatorWithKernel {
                               "Input(Weight) should be a 1D tensor."));
         PADDLE_ENFORCE_EQ(x_dims[1], w_dims[0],
                           platform::errors::InvalidArgument(
-                              "Input(Weight) Tensor's size should match"
-                              "to the class numer."));
+                              "Input(Weight) Tensor's size should match "
+                              "to the the total number of classes."));
       }
     }
     if (x_dims.size() == 2) {