Skip to content
This repository was archived by the owner on Jan 3, 2023. It is now read-only.

Sarkars/batchnorm update #318

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 17 additions & 15 deletions src/ngraph_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1717,34 +1717,36 @@ static Status TranslateFusedBatchNormOp(
std::shared_ptr<ng::Node> ng_batch_norm;

if (tf_is_training) {
ng_batch_norm = make_shared<ng::op::BatchNormTraining>(tf_epsilon, ng_scale,
ng_offset, ng_input);
ng_batch_norm = make_shared<ng::op::BatchNormTraining>(
ng_input, ng_scale, ng_offset, tf_epsilon);

shared_ptr<ngraph::Node> ng_y, ng_mean, ng_variance;
ng_y = make_shared<ng::op::GetOutputElement>(ng_batch_norm, 0);
ng_mean = make_shared<ng::op::GetOutputElement>(ng_batch_norm, 1);
ng_variance = make_shared<ng::op::GetOutputElement>(ng_batch_norm, 2);
shared_ptr<ngraph::Node> ng_y_out, ng_mean_out, ng_variance_out;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I must misunderstand about the training op orders. In ngraph, shouldn't the output order be {gamma, beta, input}? Could you please explain this a little bit?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this PR was to sync with this PR in ngraph, which reorders batch norm: NervanaSystems/ngraph#2046 (comment)

But apparently that has been closed, and will come later, so I suppose we don't have to do anything now.

ng_y_out = make_shared<ng::op::GetOutputElement>(ng_batch_norm, 0);
ng_mean_out = make_shared<ng::op::GetOutputElement>(ng_batch_norm, 1);
ng_variance_out = make_shared<ng::op::GetOutputElement>(ng_batch_norm, 2);

BatchToTensorflow(is_nhwc, ng_y);
BatchToTensorflow(is_nhwc, ng_y_out);

SaveNgOp(ng_op_map, op->name(), ng_y);
SaveNgOp(ng_op_map, op->name(), ng_mean);
SaveNgOp(ng_op_map, op->name(), ng_variance);
SaveNgOp(ng_op_map, op->name(), ng_y_out);
SaveNgOp(ng_op_map, op->name(), ng_mean_out);
SaveNgOp(ng_op_map, op->name(), ng_variance_out);
// Output reserve_space_1: A 1D Tensor for the computed batch mean, to be
// reused in the gradient computation.
SaveNgOp(ng_op_map, op->name(), ng_mean);
SaveNgOp(ng_op_map, op->name(), ng_mean_out);
// Output reserve_space_2: A 1D Tensor for the computed batch variance
//(inverted variance in the cuDNN case), to be reused in the gradient
// computation.
SaveNgOp(ng_op_map, op->name(), ng_variance);
SaveNgOp(ng_op_map, op->name(), ng_variance_out);
} else {
ng_batch_norm = make_shared<ng::op::BatchNormInference>(
tf_epsilon, ng_scale, ng_offset, ng_input, ng_mean, ng_variance);
ng_input, ng_scale, ng_offset, ng_mean, ng_variance, tf_epsilon);
BatchToTensorflow(is_nhwc, ng_batch_norm);
SaveNgOp(ng_op_map, op->name(), ng_batch_norm);
// When train=false, only one output is expected
// Note here: EXPECT_EQ(1, tensors_expected.size());
// https://github.com/tensorflow/tensorflow/blob/a767a02ca976d00b9e8e06042bdc2a2bb33b00eb/tensorflow/core/grappler/optimizers/remapper_test.cc#L47
}

SaveNgOp(ng_op_map, op->name(), ng_batch_norm);
return Status::OK();
}

Expand Down Expand Up @@ -1807,7 +1809,7 @@ static Status TranslateFusedBatchNormGradOp(
std::shared_ptr<ng::Node> ng_batch_norm_backprop;

ng_batch_norm_backprop = make_shared<ng::op::BatchNormTrainingBackprop>(
tf_epsilon, ng_scale, ng_beta, ng_input, ng_mean, ng_variance, ng_delta);
ng_input, ng_scale, ng_beta, ng_mean, ng_variance, ng_delta, tf_epsilon);

shared_ptr<ngraph::Node> ng_input_delta_op =
make_shared<ng::op::GetOutputElement>(ng_batch_norm_backprop, 0);
Expand Down
75 changes: 75 additions & 0 deletions test/test_nn_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -912,6 +912,81 @@ TEST(NNOps, Conv2DBackpropInputNHWCWithDilation) {
}
} // end of op Conv2DBackpropInputNHWCWithDilation

// FusedBatchNorm : Forward pass, training = true
// TODO fix this test
TEST(NNOps, DISABLED_FusedBatchNormNHWCTrainTrue) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test does not pass. Sample output:

[ RUN      ] NNOps.DISABLE_FusedBatchNormNHWCTrainTrue
2018-11-19 01:03:39.177831: I tensorflow/core/common_runtime/process_util.cc:69] Creating new thread pool wit
h default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.   
/localdisk/sarkars/workspace1/tf_ngtf_7_mkl_1_12/ngraph-tf/test/test_utilities.h:126: Failure  
Value of: rt
  Actual: false
Expected: true
 TF output 20.955995559692383
 NG output 20.606725692749023
/localdisk/sarkars/workspace1/tf_ngtf_7_mkl_1_12/ngraph-tf/test/test_utilities.h:126: Failure  
Value of: rt
  Actual: false
Expected: true
 TF output 21.971120834350586
 NG output 21.604936599731445
[  FAILED  ] NNOps.DISABLE_FusedBatchNormNHWCTrainTrue (125 ms)

Scope root = Scope::NewRootScope();

// 4D tensor for the gradient with respect to y
Tensor x(DT_FLOAT, TensorShape({5, 4, 3, 2}));
// 4D tensor for input data
Tensor scale(DT_FLOAT, TensorShape({2}));
// 1D tensor for scaling the normalized x
Tensor offset(DT_FLOAT, TensorShape({2}));
// 1D (empty) tensor for population mean
Tensor mean(DT_FLOAT, TensorShape({0}));
// 1D (empty) tensor for population variance
Tensor variance(DT_FLOAT, TensorShape({0}));

AssignInputValuesRandom<float>(x, -10.0f, 5.0f);
AssignInputValuesRandom<float>(scale, -10.0f, 5.0f);
AssignInputValuesRandom<float>(offset, -2.0f, 2.0f);
// We will not fill mean and variance,
// since it is not used when training=true

auto attrs = ops::FusedBatchNorm::Attrs();
attrs.is_training_ = true;
attrs.epsilon_ = 0.0001f;
attrs.data_format_ = "NHWC";

// test grab all the outputs from the FusedBatchNorm op
vector<int> static_input_indexes = {};
vector<DataType> output_datatypes(5, DT_FLOAT);
auto R = ops::FusedBatchNorm(root, x, scale, offset, mean, variance, attrs);
std::vector<Output> sess_run_fetchoutputs = {
R.y, R.batch_mean, R.batch_variance, R.reserve_space_1,
R.reserve_space_2};
OpExecuter opexecuter(root, "FusedBatchNorm", static_input_indexes,
output_datatypes, sess_run_fetchoutputs);
opexecuter.RunTest();
}

// FusedBatchNorm : Forward pass, training = false
TEST(NNOps, FusedBatchNormNHWCTrainFalse) {
Scope root = Scope::NewRootScope();

// 4D tensor for the gradient with respect to y
Tensor x(DT_FLOAT, TensorShape({5, 4, 3, 2}));
// 4D tensor for input data
Tensor scale(DT_FLOAT, TensorShape({2}));
// 1D tensor for scaling the normalized x
Tensor offset(DT_FLOAT, TensorShape({2}));
// 1D tensor for population mean
Tensor mean(DT_FLOAT, TensorShape({2}));
// 1D tensor for population variance
Tensor variance(DT_FLOAT, TensorShape({2}));

AssignInputValuesRandom<float>(x, -5.0f, 10.0f);
AssignInputValuesRandom<float>(scale, -10.0f, 10.0f);
AssignInputValuesRandom<float>(offset, -1.6f, 1.6f);
AssignInputValuesRandom<float>(mean, 1.1f, 1.5f);
AssignInputValuesRandom<float>(variance, 0.5f, 1.5f);

auto attrs = ops::FusedBatchNorm::Attrs();
attrs.is_training_ = false;
attrs.epsilon_ = 0.0001f;
attrs.data_format_ = "NHWC";

// test grab all the outputs from the FusedBatchNorm op
vector<int> static_input_indexes = {};
vector<DataType> output_datatypes = {DT_FLOAT};
auto R = ops::FusedBatchNorm(root, x, scale, offset, mean, variance, attrs);
std::vector<Output> sess_run_fetchoutputs = {R.y};
OpExecuter opexecuter(root, "FusedBatchNorm", static_input_indexes,
output_datatypes, sess_run_fetchoutputs);
opexecuter.RunTest();
}

// FusedBatchNormGrad : Gradient for batch normalization
// On TF CPU: only supports NHWC
TEST(NNOps, FusedBatchNormGradNHWC) {
Expand Down