Skip to content
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
7086e1c
"unified operators"
dzhwinter Dec 15, 2017
ab0cf15
Merge remote-tracking branch 'origin/develop' into feature/cudnn_kernel
dzhwinter Dec 17, 2017
c1964df
"add CUDNN register"
dzhwinter Dec 18, 2017
20828d9
merge develop branch
dzhwinter Dec 25, 2017
755936d
"add use cudnn attribute"
dzhwinter Jan 5, 2018
39f51e0
merge develop branch
dzhwinter Jan 5, 2018
75acd80
"add attribute"
dzhwinter Jan 5, 2018
f116609
"test conv tranpose op"
dzhwinter Jan 5, 2018
6ca8206
"remove duplicated attr"
dzhwinter Jan 5, 2018
21893df
"fix op test"
dzhwinter Jan 5, 2018
2787821
"add attribute to set cudnn"
dzhwinter Jan 8, 2018
b60a57d
"add more log"
dzhwinter Jan 8, 2018
6da6045
"need layout op register support"
dzhwinter Jan 8, 2018
56fb182
"add more log"
dzhwinter Jan 8, 2018
b81372c
merge develop branch
dzhwinter Jan 8, 2018
2c80230
merge develop branch
dzhwinter Jan 9, 2018
8b46297
"change GetExpectedKernelType "
dzhwinter Jan 9, 2018
b58555c
"fix Get attr in conv_op"
dzhwinter Jan 9, 2018
6d8af62
Merge remote-tracking branch 'origin/develop' into feature/cudnn_kernel
dzhwinter Jan 10, 2018
ccf39fe
"fix CI"
dzhwinter Jan 10, 2018
327d813
"fix tests"
dzhwinter Jan 10, 2018
f05fdfe
"removed kernel priority fallback"
dzhwinter Jan 10, 2018
17bd560
merge develop branch
dzhwinter Jan 10, 2018
0b7b8f5
"fix CI"
dzhwinter Jan 10, 2018
e6f6ec1
"fix stack pointer bug"
dzhwinter Jan 10, 2018
b285464
"refine buggy interface"
dzhwinter Jan 10, 2018
bf787d4
"add const cast to save life"
dzhwinter Jan 10, 2018
de74369
"fix get_output_with_grad"
dzhwinter Jan 11, 2018
c0b6f9e
"fix op test with dataformat"
dzhwinter Jan 11, 2018
ea850d3
merge develop
dzhwinter Jan 11, 2018
0bf18f8
""fix pooling
dzhwinter Jan 11, 2018
c32fe6a
"fix pooling test"
dzhwinter Jan 11, 2018
84906e1
"fix CI"
dzhwinter Jan 12, 2018
59aa443
"fix with_gpu error"
dzhwinter Jan 12, 2018
212bf5f
"add transform needed functional check"
dzhwinter Jan 12, 2018
adb6714
"fix unpack list error"
dzhwinter Jan 12, 2018
caf3324
"comment out parallel.do temporary"
dzhwinter Jan 12, 2018
bfbbdf3
"fix CI"
dzhwinter Jan 12, 2018
f2312dd
"fix compile doc error"
dzhwinter Jan 14, 2018
d0b8b9b
"make threshold larger"
dzhwinter Jan 14, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions paddle/framework/data_layout.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ limitations under the License. */

#pragma once

#include <iostream>
#include <cctype>
#include <ostream>

#include "paddle/platform/enforce.h"

namespace paddle {
Expand All @@ -27,12 +29,19 @@ enum class DataLayout {
};

inline DataLayout StringToDataLayout(const std::string& str) {
if (str == "NHWC" || str == "nhwc") {
std::string s(str);
for (size_t i = 0; i < s.size(); ++i) {
s[i] = toupper(s[i]);
}

if (s == "NHWC") {
return DataLayout::kNHWC;
} else if (str == "NCHW" || str == "nchw") {
} else if (s == "NCHW") {
return DataLayout::kNCHW;
} else if (s == "ANYLAYOUT") {
return DataLayout::kAnyLayout;
} else {
PADDLE_THROW("Unknown storage order string: %s", str);
PADDLE_THROW("Unknown storage order string: %s", s);
}
}

Expand All @@ -49,7 +58,7 @@ inline std::string DataLayoutToString(const DataLayout& data_layout) {
}
}

inline std::ostream& operator<<(std::ostream& out, DataLayout l) {
inline std::ostream& operator<<(std::ostream& out, const DataLayout& l) {
out << DataLayoutToString(l);
return out;
}
Expand Down
13 changes: 8 additions & 5 deletions paddle/framework/data_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,19 @@ limitations under the License. */
namespace paddle {
namespace framework {

Tensor* DataTransform(const OpKernelType& expected_kernel_type,
const OpKernelType& kernel_type_for_var,
const Tensor& input_tensor) {
Tensor* out = nullptr;
void DataTransform(const OpKernelType& expected_kernel_type,
const OpKernelType& kernel_type_for_var,
const Tensor& input_tensor, Tensor* out) {
if (expected_kernel_type.library_type_ != kernel_type_for_var.library_type_) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

out->set_library()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, library is a mark of a kernel. Tensor does not have that field.

Copy(input_tensor, expected_kernel_type.place_, out);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

突然在想,library需要参与transform运算么,还是只需要参与选kernel就行了

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes. only the mkldnn need to do the transform.

return;
}

if (!platform::is_same_place(kernel_type_for_var.place_,
expected_kernel_type.place_)) {
out = DeviceTransform(input_tensor, expected_kernel_type.place_);
}
PADDLE_ENFORCE_NOT_NULL(out, "out should not be null");
return out;
}

void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor,
Expand Down
6 changes: 3 additions & 3 deletions paddle/framework/data_transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ limitations under the License. */
namespace paddle {
namespace framework {

Tensor* DataTransform(const OpKernelType& expected_kernel_type,
const OpKernelType& kernel_type_for_var,
const Tensor& input_tensor);
void DataTransform(const OpKernelType& expected_kernel_type,
const OpKernelType& kernel_type_for_var,
const Tensor& input_tensor, Tensor* out);

void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor,
Variable& out_var);
Expand Down
147 changes: 53 additions & 94 deletions paddle/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,51 +26,6 @@ namespace framework {

std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority;

void UseCPU() {
kKernelPriority.clear();
/*Plain CPU*/
auto pair0 = std::make_tuple(platform::CPUPlace(), LibraryType::kPlain);
kKernelPriority.insert(kKernelPriority.begin(), pair0);
}

void UseMKLDNN() {
UseCPU();
#if PADDLE_WITH_MKLML
{
/*MKLDNN Kernel*/
auto pair0 = std::make_tuple(platform::CPUPlace(), LibraryType::kMKLDNN);
kKernelPriority.insert(kKernelPriority.begin(), pair0);
}
#endif
}

void UseCUDA() {
UseMKLDNN();
#if PADDLE_WITH_CUDA
/*Plain GPU*/
auto pair0 = std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain);
kKernelPriority.insert(kKernelPriority.begin(), pair0);
#endif
}

void UseCUDNN() {
UseCUDA();
#if PADDLE_WITH_CUDA
if (platform::dynload::HasCUDNN()) {
/*CUDNN Kernel*/
auto pair0 = std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN);
kKernelPriority.insert(kKernelPriority.begin(), pair0);
}
#endif
}

void UseALL() {
UseCPU();
UseMKLDNN();
UseCUDA();
UseCUDNN();
}

static DDim GetDims(const Scope& scope, const std::string& name) {
Variable* var = scope.FindVar(name);
if (var == nullptr) {
Expand Down Expand Up @@ -247,36 +202,33 @@ static bool VarIsTensor(const Variable* var) {
return var->IsType<LoDTensor>() || var->IsType<SelectedRows>();
}

static const Tensor* GetTensorFromVar(const Variable* var) {
const Tensor* t = nullptr;
static const Tensor* GetTensorFromVar(Variable* var) {
if (var->IsType<LoDTensor>()) {
t = &(var->Get<LoDTensor>());
return var->GetMutable<LoDTensor>();
} else if (var->IsType<SelectedRows>()) {
t = &(var->Get<SelectedRows>().value());
return var->GetMutable<SelectedRows>()->mutable_value();
} else {
PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
var->Type().name());
}
return t;
}

static Tensor* GetMutableTensorFromVar(Variable* var) {
Tensor* t = nullptr;
if (var->IsType<LoDTensor>()) {
t = var->GetMutable<LoDTensor>();
return var->GetMutable<LoDTensor>();
} else if (var->IsType<SelectedRows>()) {
t = var->GetMutable<SelectedRows>()->mutable_value();
return var->GetMutable<SelectedRows>()->mutable_value();
} else {
PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
var->Type().name());
}
return t;
}

template <>
const Tensor* ExecutionContext::Input<Tensor>(const std::string& name) const {
auto* var = InputVar(name);
return var == nullptr ? nullptr : GetTensorFromVar(var);
return var == nullptr ? nullptr
: GetTensorFromVar(const_cast<Variable*>(var));
}

template <>
Expand Down Expand Up @@ -319,6 +271,7 @@ bool OpSupportGPU(const std::string& op_type) {
auto it = all_kernels.find(op_type);
if (it == all_kernels.end()) {
// All control operator must support GPU

return true;
}
for (auto& kern_pair : it->second) {
Expand Down Expand Up @@ -492,58 +445,64 @@ void OperatorWithKernel::Run(const Scope& scope,
}

ExecutionContext ctx(*this, scope, *dev_ctx);
auto expected_kernel_key = this->GetExpectedKernelType(ctx);

OpKernelMap& kernels = kernels_iter->second;

for (auto& candidate : kKernelPriority) {
auto candidate_key =
OpKernelType(expected_kernel_key.data_type_, std::get<0>(candidate),
expected_kernel_key.data_layout_, std::get<1>(candidate));
// TODO(dzhwinter) : kernel fallback mechanism will be added when all the
// transform functions are ready.

if ((candidate_key == expected_kernel_key) ||
(kernels.count(candidate_key))) {
expected_kernel_key = candidate_key;
break;
}
}
// for (auto& candidate : kKernelPriority) {
// Do selection
// }

auto expected_kernel_key = this->GetExpectedKernelType(ctx);

VLOG(3) << "expected_kernel_key:" << expected_kernel_key;

Scope& new_scope = scope.NewScope();

for (auto& var_name_item : this->Inputs()) {
for (auto& var_name : var_name_item.second) {
auto* var = scope.FindVar(var_name);
if (var && VarIsTensor(var)) {
auto* tensor_in = GetTensorFromVar(var);
if (tensor_in->IsInitialized()) {
auto kernel_type_for_var = this->GetKernelTypeForVar(
var_name_item.first, *tensor_in, expected_kernel_key);
if (kernel_type_for_var != expected_kernel_key) {
auto out_var_names = OutputVars(true);
if (std::find(out_var_names.begin(), out_var_names.end(),
var_name) != out_var_names.end()) {
PADDLE_THROW(
"var %s is both input and output, "
"does not support transform",
var_name);
}
VLOG(3) << "need to do transform for var " << var_name;
auto* trans_var = new_scope.Var(var_name);
auto* out = DataTransform(expected_kernel_key, kernel_type_for_var,
*tensor_in);
CopyVariableWithTensor(*var, *out, *trans_var);
}
}
}
}
if (expected_kernel_key.library_type_ != LibraryType::kCUDNN) {
// for (auto& var_name_item : this->Inputs()) {
// for (auto& var_name : var_name_item.second) {
// auto* var = scope.FindVar(var_name);
// if (var && VarIsTensor(var)) {
// auto* tensor_in = GetTensorFromVar(var);
// if (tensor_in->IsInitialized()) {
// auto kernel_type_for_var = this->GetKernelTypeForVar(
// var_name_item.first, *tensor_in, expected_kernel_key);
// if (kernel_type_for_var != expected_kernel_key) {
// auto out_var_names = OutputVars(true);
// if (std::find(out_var_names.begin(), out_var_names.end(),
// var_name) != out_var_names.end()) {
// PADDLE_THROW(
// "var %s is both input and output, "
// "does not support transform",
// var_name);
// }
// VLOG(3) << "need to do transform for var " << var_name;
// auto* trans_var = new_scope.Var(var_name);
// std::shared_ptr<Tensor> out(new Tensor);
// DataTransform(expected_kernel_key, kernel_type_for_var,
// *tensor_in, out.get());
// CopyVariableWithTensor(*var, *(out.get()), *trans_var);
// }
// }
// }
// }
// }
}

auto kernel_iter = kernels.find(expected_kernel_key);

kernel_iter->second->Compute(ExecutionContext(
*this, new_scope, *pool.Get(expected_kernel_key.place_)));
auto new_ctx =
ExecutionContext(*this, new_scope, *pool.Get(expected_kernel_key.place_));
VLOG(3) << "construct ExecutionContext ";
if (kernel_iter == kernels.end()) {
VLOG(3) << " Not such kernel";
}
auto& final_kernel = kernel_iter->second;
VLOG(3) << " before compute ";
final_kernel->Compute(new_ctx);
}

proto::DataType OperatorWithKernel::IndicateDataType(
Expand Down
26 changes: 1 addition & 25 deletions paddle/framework/operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,33 +54,9 @@ constexpr char kGradVarSuffix[] = "@GRAD";
constexpr char kZeroVarSuffix[] = "@ZERO";

// define some kernel priority
/* Define multiple kernel type fallback order*/
extern std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority;

/**
* @brief Use cpu kernel only
*/
void UseCPU();

/**
* @brief Perfer MKLDNN kernel than Plain CPU kernel
*/
void UseMKLDNN();

/**
* @brief Perfer CUDA kernel than Plain CPU kernel
*/
void UseCUDA();

/**
* @brief Perfer cudnn kernel than Plain CUDA kernel
*/
void UseCUDNN();

/**
* @brief Use all available kernels
*/
void UseALL();

inline std::string GradVarName(const std::string& var_name) {
return var_name + kGradVarSuffix;
}
Expand Down
13 changes: 10 additions & 3 deletions paddle/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,6 @@ op_library(sequence_softmax_op DEPS softmax)
op_library(sum_op DEPS selected_rows_functor)
op_library(sgd_op DEPS selected_rows_functor)
op_library(adagrad_op DEPS selected_rows_functor)
op_library(conv_op DEPS vol2col)
op_library(pool_op DEPS pooling)
op_library(maxout_op DEPS maxouting)
op_library(unpool_op DEPS unpooling)
op_library(pool_with_index_op DEPS pooling)
Expand All @@ -148,12 +146,21 @@ op_library(max_sequence_len_op DEPS lod_rank_table)
op_library(sequence_conv_op DEPS context_project)
op_library(sequence_pool_op DEPS sequence_pooling)
op_library(lstm_op DEPS sequence2batch lstm_compute)
op_library(conv_transpose_op DEPS vol2col)
op_library(gru_op DEPS sequence2batch gru_compute)
op_library(recurrent_op DEPS executor)
op_library(warpctc_op DEPS dynload_warpctc sequence_padding math_function)
op_library(cos_sim_op DEPS cos_sim_functor)
op_library(parallel_do_op DEPS executor)

# Regist multiple Kernel to pybind
op_library(conv_op SRCS conv_op.cc conv_op.cu.cc conv_cudnn_op.cu.cc DEPS vol2col)
op_library(pool_op SRCS pool_op.cc pool_op.cu.cc pool_cudnn_op.cu.cc DEPS pooling)
op_library(conv_transpose_op SRCS conv_transpose_op.cc conv_transpose_op.cu.cc
conv_transpose_cudnn_op.cu.cc DEPS vol2col)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(conv2d, CUDNN);\n")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(pool2d, CUDNN);\n")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(conv2d_transpose, CUDNN);\n")

# FIXME(typhoonzero): save/load depends lodtensor serialization functions
op_library(save_op DEPS lod_tensor)
op_library(load_op DEPS lod_tensor)
Expand Down
Loading