diff --git a/doc/sphinx/ngraph.doxyfile b/doc/sphinx/ngraph.doxyfile index 88da4a97846..ffea9942355 100644 --- a/doc/sphinx/ngraph.doxyfile +++ b/doc/sphinx/ngraph.doxyfile @@ -1807,7 +1807,7 @@ SEARCH_INCLUDES = YES # preprocessor. # This tag requires that the tag SEARCH_INCLUDES is set to YES. -INCLUDE_PATH = +INCLUDE_PATH = ../../src # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the diff --git a/doc/sphinx/source/ops/batch_norm.rst b/doc/sphinx/source/ops/batch_norm.rst deleted file mode 100644 index 53b117f389a..00000000000 --- a/doc/sphinx/source/ops/batch_norm.rst +++ /dev/null @@ -1,105 +0,0 @@ -.. batch_norm.rst: - -######### -BatchNorm -######### - -.. code-block:: cpp - - BatchNorm // Produces a normalized output - - -Description -=========== - -Produces a normalized output. - -Inputs ------- - -+---------------------+-------------------------+-----------------------------+ -| Name | Element Type | Shape | -+=====================+=========================+=============================+ -| ``input`` | same as ``gamma`` | \(..., C, ...\) | -+---------------------+-------------------------+-----------------------------+ -| ``gamma`` | any | \(C\) | -+---------------------+-------------------------+-----------------------------+ -| ``beta`` | same as ``gamma`` | \(C\) | -+---------------------+-------------------------+-----------------------------+ -| ``global_mean`` | same as ``gamma`` | \(C\) | -+---------------------+-------------------------+-----------------------------+ -| ``global_variance`` | same as ``gamma`` | \(C\) | -+---------------------+-------------------------+-----------------------------+ -| ``use_global`` | ``bool`` | \(\) | -+---------------------+-------------------------+-----------------------------+ - - -Attributes ----------- - -+------------------+--------------------+---------------------+ -| Name | Type | Notes | -+==================+====================+=====================+ -| ``epsilon`` | same as ``input`` | Bias for variance | -+------------------+--------------------+---------------------+ -| ``channel_axis`` | size_t | Channel axis | -+------------------+--------------------+---------------------+ - -Outputs -------- - -+---------------------+-------------------------+-----------------------------+ -| Name | Element Type | Shape | -+=====================+=========================+=============================+ -| ``normalized`` | same as ``gamma`` | same as ``input`` | -+---------------------+-------------------------+-----------------------------+ -| ``batch_mean`` | same as ``gamma`` | \(C\) | -+---------------------+-------------------------+-----------------------------+ -| ``batch_variance`` | same as ``gamma`` | \(C\) | -+---------------------+-------------------------+-----------------------------+ - -The ``batch_mean`` and ``batch_variance`` outputs are computed per-channel from -``input``. The values only need to be computed if ``use_global`` is ``false``, -or if they are used. - - -Mathematical Definition -======================= - -The axes of the input fall into two categories: positional and channel, with -channel being axis 1. For each position, there are :math:`C` channel values, -each normalized independently. - -Normalization of a channel sample is controlled by two values: - -* the mean :math:`\mu`, and -* the variance :math:`\sigma^2`; - -and by two scaling attributes: :math:`\gamma` and :math:`\beta`. - -The values for :math:`\mu` and :math:`\sigma^2` come either from computing the -mean and variance of ``input``, or from ``global_mean`` and ``global_variance``, -depending on the value of ``use_global``. - -.. math:: - - y_c = \frac{x_c-\mu_c}{\sqrt{\sigma^2_c+\epsilon}}\gamma_c+\beta_c - -The mean and variance can be arguments, or they may be computed for each channel -of ``input`` over the positional axes. When computed from ``input``, the mean -and variance per-channel are available as outputs. - - -C++ Interface -============== - -.. doxygenclass:: ngraph::op::BatchNormTraining - :project: ngraph - :members: - - -.. doxygenclass:: ngraph::op::BatchNormInference - :project: ngraph - :members: - - diff --git a/doc/sphinx/source/ops/batch_norm_inference.rst b/doc/sphinx/source/ops/batch_norm_inference.rst new file mode 100644 index 00000000000..9017ac19c20 --- /dev/null +++ b/doc/sphinx/source/ops/batch_norm_inference.rst @@ -0,0 +1,80 @@ +.. batch_norm_inference.rst: + +################## +BatchNormInference +################## + +.. code-block:: cpp + + BatchNormInference // Adjust input for mean and variance + + +Description +=========== + + + +Inputs +------ + ++---------------------+-------------------------+------------------------------+ +| Name | Element Type | Shape | ++=====================+=========================+==============================+ +| ``input`` | real | :math:`(\bullet, C, \ldots)` | ++---------------------+-------------------------+------------------------------+ +| ``gamma`` | same as ``input`` | :math:`(C)` | ++---------------------+-------------------------+------------------------------+ +| ``beta`` | same as ``input`` | :math:`(C)` | ++---------------------+-------------------------+------------------------------+ +| ``mean`` | same as ``input`` | :math:`(C)` | ++---------------------+-------------------------+------------------------------+ +| ``variances`` | same as ``input`` | :math:`(C)` | ++---------------------+-------------------------+------------------------------+ + + +Attributes +---------- + ++------------------+--------------------+--------------------------------------------------------+ +| Name | Type | Notes | ++==================+====================+========================================================+ +| ``epsilon`` | ``double`` | Small bias added to variance to avoid division by 0. | ++------------------+--------------------+--------------------------------------------------------+ + +Outputs +------- + ++---------------------+-------------------------+-----------------------------+ +| Name | Element Type | Shape | ++=====================+=========================+=============================+ +| ``normalized`` | same as ``gamma`` | Same as ``input`` | ++---------------------+-------------------------+-----------------------------+ + +Mathematical Definition +======================= + +The axes of the input fall into two categories: positional and channel, with +channel being axis 1. For each position, there are :math:`C` channel values, +each normalized independently. + +Normalization of a channel sample is controlled by two values: + +* the `mean` :math:`\mu`, and + +* the `variance` :math:`\sigma^2`; + +and by two scaling attributes: :math:`\gamma` and :math:`\beta`. + +.. math:: + + \mathtt{normalized}_{\bullet, c, \ldots} = \frac{\mathtt{input}_{\bullet, c, \ldots}-\mu_c}{\sqrt{\sigma^2_c+\epsilon}}\gamma_c+\beta_c + + +C++ Interface +============== + +.. doxygenclass:: ngraph::op::BatchNormInference + :project: ngraph + :members: + + diff --git a/doc/sphinx/source/ops/batch_norm_training.rst b/doc/sphinx/source/ops/batch_norm_training.rst new file mode 100644 index 00000000000..a458d54a061 --- /dev/null +++ b/doc/sphinx/source/ops/batch_norm_training.rst @@ -0,0 +1,89 @@ +.. batch_norm_training.rst: + +################# +BatchNormTraining +################# + +.. code-block:: cpp + + BatchNormTraining // Compute mean and variance from the input. + + +Description +=========== + + + +Inputs +------ + ++---------------------+-------------------------+------------------------------+ +| Name | Element Type | Shape | ++=====================+=========================+==============================+ +| ``input`` | real | :math:`(\bullet, C, \ldots)` | ++---------------------+-------------------------+------------------------------+ +| ``gamma`` | same as ``input`` | :math:`(C)` | ++---------------------+-------------------------+------------------------------+ +| ``beta`` | same as ``input`` | :math:`(C)` | ++---------------------+-------------------------+------------------------------+ + + +Attributes +---------- + ++------------------+--------------------+--------------------------------------------------------+ +| Name | Type | Notes | ++==================+====================+========================================================+ +| ``epsilon`` | ``double`` | Small bias added to variance to avoid division by 0. | ++------------------+--------------------+--------------------------------------------------------+ + +Outputs +------- + ++---------------------+-------------------------+-----------------------------+ +| Name | Element Type | Shape | ++=====================+=========================+=============================+ +| ``normalized`` | same as ``gamma`` | Same as ``input`` | ++---------------------+-------------------------+-----------------------------+ +| ``batch_mean`` | same as ``gamma`` | :math:`(C)` | ++---------------------+-------------------------+-----------------------------+ +| ``batch_variance`` | same as ``gamma`` | :math:`(C)` | ++---------------------+-------------------------+-----------------------------+ + +The ``batch_mean`` and ``batch_variance`` outputs are computed per-channel from +``input``. + + +Mathematical Definition +======================= + +The axes of the input fall into two categories: positional and channel, with +channel being axis 1. For each position, there are :math:`C` channel values, +each normalized independently. + +Normalization of a channel sample is controlled by two values: + +* the `batch_mean` :math:`\mu`, and + +* the `batch_variance` :math:`\sigma^2`; + +and by two scaling attributes: :math:`\gamma` and :math:`\beta`. + +The values for :math:`\mu` and :math:`\sigma^2` come from computing the +mean and variance of ``input``. + +.. math:: + + \mu_c &= \mathop{\mathbb{E}}\left(\mathtt{input}_{\bullet, c, \ldots}\right)\\ + \sigma^2_c &= \mathop{\mathtt{Var}}\left(\mathtt{input}_{\bullet, c, \ldots}\right)\\ + \mathtt{normlized}_{\bullet, c, \ldots} &= \frac{\mathtt{input}_{\bullet, c, \ldots}-\mu_c}{\sqrt{\sigma^2_c+\epsilon}}\gamma_c+\beta_c + + +C++ Interface +============== + +.. doxygenclass:: ngraph::op::BatchNormTraining + :project: ngraph + :members: + + diff --git a/doc/sphinx/source/ops/batch_norm_training_backprop.rst b/doc/sphinx/source/ops/batch_norm_training_backprop.rst new file mode 100644 index 00000000000..f759cc1fbf1 --- /dev/null +++ b/doc/sphinx/source/ops/batch_norm_training_backprop.rst @@ -0,0 +1,71 @@ +.. batch_norm_training_backprop.rst: + +######################### +BatchNormTrainingBackprop +######################### + +.. code-block:: cpp + + BatchNormTrainingBackprop // Compute mean and variance backprop from the input. + + +Description +=========== + + + +Inputs +------ + ++----------------------+-------------------------+------------------------------+ +| Name | Element Type | Shape | ++======================+=========================+==============================+ +| ``input`` | real | :math:`(\bullet, C, \ldots)` | ++----------------------+-------------------------+------------------------------+ +| ``gamma`` | same as ``input`` | :math:`(C)` | ++----------------------+-------------------------+------------------------------+ +| ``beta`` | same as ``input`` | :math:`(C)` | ++----------------------+-------------------------+------------------------------+ +| ``mean`` | same as ``input`` | :math:`(C)` | ++----------------------+-------------------------+------------------------------+ +| ``variance`` | same as ``input`` | :math:`(C)` | ++----------------------+-------------------------+------------------------------+ +| ``normalized_delta`` | same as ``input`` | same as ``input`` | ++----------------------+-------------------------+------------------------------+ + + +Attributes +---------- + ++------------------+--------------------+--------------------------------------------------------+ +| Name | Type | Notes | ++==================+====================+========================================================+ +| ``epsilon`` | ``double`` | Small bias added to variance to avoid division by 0. | ++------------------+--------------------+--------------------------------------------------------+ + +Outputs +------- + ++---------------------+-------------------------+-----------------------------+ +| Name | Element Type | Shape | ++=====================+=========================+=============================+ +| ``input_delta`` | same as ``input`` | Same as ``input`` | ++---------------------+-------------------------+-----------------------------+ +| ``gamma_delta`` | same as ``gamma`` | :math:`(C)` | ++---------------------+-------------------------+-----------------------------+ +| ``beta_delta`` | same as ``beta`` | :math:`(C)` | ++---------------------+-------------------------+-----------------------------+ + + +Mathematical Definition +======================= + + +C++ Interface +============== + +.. doxygenclass:: ngraph::op::BatchNormTrainingBackprop + :project: ngraph + :members: + + diff --git a/doc/sphinx/source/ops/index.rst b/doc/sphinx/source/ops/index.rst index 3b70d1e058f..8f0423dd082 100644 --- a/doc/sphinx/source/ops/index.rst +++ b/doc/sphinx/source/ops/index.rst @@ -56,7 +56,9 @@ Not currently a comprehensive list. * :doc:`atan` * :doc:`avg_pool` * :doc:`avg_pool_backprop` - * :doc:`batch_norm` + * :doc:`batch_norm_inference` + * :doc:`batch_norm_training` + * :doc:`batch_norm_training_backprop` * :doc:`broadcast` * :doc:`ceiling` * :doc:`concat` @@ -123,7 +125,9 @@ Not currently a comprehensive list. atan.rst avg_pool.rst avg_pool_backprop.rst - batch_norm.rst + batch_norm_inference.rst + batch_norm_training.rst + batch_norm_training_backprop.rst broadcast.rst ceiling.rst concat.rst diff --git a/python/ngraph/ops.py b/python/ngraph/ops.py index 0b6bb262c18..1cab4de7179 100644 --- a/python/ngraph/ops.py +++ b/python/ngraph/ops.py @@ -924,9 +924,9 @@ def batch_norm(eps, # type: float # type: (...) -> Node """Return batch normalization node.""" if mean is None and variance is None: - return BatchNormTraining(eps, gamma, beta, data) + return BatchNormTraining(data, gamma, beta, eps) else: - return BatchNormInference(eps, gamma, beta, data, mean, variance) + return BatchNormInference(data, gamma, beta, mean, variance, eps) @nameable_op diff --git a/python/pyngraph/ops/batch_norm.cpp b/python/pyngraph/ops/batch_norm.cpp index 11fb21c2e07..2e7e32d97bd 100644 --- a/python/pyngraph/ops/batch_norm.cpp +++ b/python/pyngraph/ops/batch_norm.cpp @@ -30,10 +30,10 @@ void regclass_pyngraph_op_BatchNormTraining(py::module m) batch_norm_training(m, "BatchNormTraining"); batch_norm_training.doc() = "ngraph.impl.op.BatchNormTraining wraps ngraph::op::BatchNormTraining"; - batch_norm_training.def(py::init&, const std::shared_ptr&, const std::shared_ptr&, - const std::shared_ptr&>()); + double>()); } void regclass_pyngraph_op_BatchNormInference(py::module m) @@ -45,12 +45,12 @@ void regclass_pyngraph_op_BatchNormInference(py::module m) batch_norm_inference.doc() = "ngraph.impl.op.BatchNormInference wraps ngraph::op::BatchNormInference"; - batch_norm_inference.def(py::init&, const std::shared_ptr&, const std::shared_ptr&, const std::shared_ptr&, const std::shared_ptr&, - const std::shared_ptr&>()); + double>()); } void regclass_pyngraph_op_BatchNormTrainingBackprop(py::module m) @@ -61,11 +61,11 @@ void regclass_pyngraph_op_BatchNormTrainingBackprop(py::module m) batch_norm_training_backprop(m, "BatchNormTrainingBackprop"); batch_norm_training_backprop.doc() = "ngraph.impl.op.BatchNormTrainingBackprop wraps ngraph::op::BatchNormTrainingBackprop"; - batch_norm_training_backprop.def(py::init&, const std::shared_ptr&, const std::shared_ptr&, const std::shared_ptr&, const std::shared_ptr&, const std::shared_ptr&, - const std::shared_ptr&>()); + double>()); } diff --git a/src/ngraph/descriptor/input.hpp b/src/ngraph/descriptor/input.hpp index e4ef4508778..74d96cd81ba 100644 --- a/src/ngraph/descriptor/input.hpp +++ b/src/ngraph/descriptor/input.hpp @@ -60,10 +60,10 @@ namespace ngraph void replace_output(Output& output); protected: - /// \return the tensor view for the connected output + /// \return the tensor for the connected output std::shared_ptr get_tensor_ptr() const; - /// \return the tensor view for the connected output + /// \return the tensor for the connected output std::shared_ptr get_tensor_ptr(); public: diff --git a/src/ngraph/descriptor/layout/tensor_layout.hpp b/src/ngraph/descriptor/layout/tensor_layout.hpp index 780a31d501c..5dc38fc5763 100644 --- a/src/ngraph/descriptor/layout/tensor_layout.hpp +++ b/src/ngraph/descriptor/layout/tensor_layout.hpp @@ -32,7 +32,7 @@ namespace ngraph { namespace layout { - /// \brief Interface for describing implementations of tensor views. + /// \brief Interface for describing implementations of tensors. /// /// Kernel selection will need to pay attention to the layout. class TensorLayout @@ -44,7 +44,7 @@ namespace ngraph public: virtual ~TensorLayout() {} - /// Extent of this view in buffer. + /// Extent of this tensor in buffer. /// /// When we support non-linear buffers, this will need to be something other than size_t. size_t get_size() const; diff --git a/src/ngraph/descriptor/output.hpp b/src/ngraph/descriptor/output.hpp index b145fb528bb..f251a0b84ed 100644 --- a/src/ngraph/descriptor/output.hpp +++ b/src/ngraph/descriptor/output.hpp @@ -39,7 +39,7 @@ namespace ngraph public: /// \param node Node that owns this output. /// \param index Position of the output tensor in all output tensors - /// \param tensor The view of this tensor; where the value will be written + /// \param tensor The tensor where the value will be written Output(Node* node, size_t index, const std::shared_ptr& tensor); std::shared_ptr get_node() const; diff --git a/src/ngraph/descriptor/tensor.hpp b/src/ngraph/descriptor/tensor.hpp index 3a5ee901e23..1bf57dcb561 100644 --- a/src/ngraph/descriptor/tensor.hpp +++ b/src/ngraph/descriptor/tensor.hpp @@ -35,7 +35,7 @@ namespace ngraph class TensorLayout; } - /// \brief Compile-time descriptor of a first-class value that is a view of a tensor. + /// \brief Compile-time descriptor of a first-class value that is a tensor. class Tensor { Tensor(const Tensor&) = delete; diff --git a/src/ngraph/frontend/onnx_import/op/batch_norm.cpp b/src/ngraph/frontend/onnx_import/op/batch_norm.cpp index 11338882026..0bf423bd9d0 100644 --- a/src/ngraph/frontend/onnx_import/op/batch_norm.cpp +++ b/src/ngraph/frontend/onnx_import/op/batch_norm.cpp @@ -54,11 +54,11 @@ namespace ngraph mean = inputs.at(3); var = inputs.at(4); return {std::make_shared( - epsilon, scale, bias, x, mean, var)}; + x, scale, bias, mean, var, epsilon)}; } return { - std::make_shared(epsilon, scale, bias, x)}; + std::make_shared(x, scale, bias, epsilon)}; } } // namespace set_1 diff --git a/src/ngraph/op/batch_norm.cpp b/src/ngraph/op/batch_norm.cpp index 251c86e418b..4f09e461bef 100644 --- a/src/ngraph/op/batch_norm.cpp +++ b/src/ngraph/op/batch_norm.cpp @@ -22,19 +22,17 @@ #include "ngraph/op/get_output_element.hpp" #include "ngraph/validation_util.hpp" -ngraph::op::BatchNormInference::BatchNormInference(double eps, - std::shared_ptr gamma, - std::shared_ptr beta, - std::shared_ptr input, - std::shared_ptr mean, - std::shared_ptr variance) - : Op("BatchNormInference", check_single_output_args({gamma, beta, input, mean, variance})) - , m_epsilon(eps) +ngraph::op::BatchNormTraining::BatchNormTraining(std::shared_ptr input, + std::shared_ptr gamma, + std::shared_ptr beta, + double epsilon) + : Op("BatchNormTraining", check_single_output_args({gamma, beta, input})) + , m_epsilon(epsilon) { - set_output_size(1); constructor_validate_and_infer_types(); } +// DEPRECATED ngraph::op::BatchNormTraining::BatchNormTraining(double eps, std::shared_ptr gamma, std::shared_ptr beta, @@ -42,50 +40,124 @@ ngraph::op::BatchNormTraining::BatchNormTraining(double eps, : Op("BatchNormTraining", check_single_output_args({gamma, beta, input})) , m_epsilon(eps) { - set_output_size(3); constructor_validate_and_infer_types(); } -void ngraph::op::BatchNormInference::validate_and_infer_types() +void ngraph::op::BatchNormTraining::validate_and_infer_types() { element::Type result_et; PartialShape result_batch_shape; - PartialShape result_channel_shape; // unused here + PartialShape result_channel_shape; + set_output_size(3); std::tie(result_et, result_batch_shape, result_channel_shape) = infer_batch_norm_forward(this, get_input_element_type(INPUT_DATA), get_input_element_type(INPUT_GAMMA), get_input_element_type(INPUT_BETA), - get_input_element_type(INPUT_MEAN), - get_input_element_type(INPUT_VARIANCE), get_input_partial_shape(INPUT_DATA), get_input_partial_shape(INPUT_GAMMA), - get_input_partial_shape(INPUT_BETA), - get_input_partial_shape(INPUT_MEAN), - get_input_partial_shape(INPUT_VARIANCE)); + get_input_partial_shape(INPUT_BETA)); set_output_type(0, result_et, result_batch_shape); + set_output_type(1, result_et, result_channel_shape); + set_output_type(2, result_et, result_channel_shape); } -void ngraph::op::BatchNormTraining::validate_and_infer_types() +std::shared_ptr + ngraph::op::BatchNormTraining::copy_with_new_args(const NodeVector& new_args) const +{ + check_new_args_count(this, new_args); + return std::make_shared( + new_args.at(2), new_args.at(0), new_args.at(1), m_epsilon); +} + +void ngraph::op::BatchNormTraining::generate_adjoints(autodiff::Adjoints& adjoints, + const NodeVector& deltas) +{ + auto gamma = get_argument(0); + auto beta = get_argument(1); + auto input = get_argument(2); + std::shared_ptr mean = nullptr; + std::shared_ptr var = nullptr; + + // Extract mean and variance outputs from BatchNormBase + // as these are used by BatchNormTrainingBackprop. + // The users of the outputs (GetOutputElements' Inputs) aren't sorted + // and get_n() is used to sort the inputs in the same order as Batchnorm's outputs + // Next, Mean and Variance (`at(1)` and `at(2)`) are extracted + // Please see `add_output` in `BatchNormBase::BatchNormBase` for more details + + auto goes = op::get_output_elements(shared_from_this()); + mean = goes.at(1); + var = goes.at(2); + if (!mean) + { + throw ngraph_error("GetOutputElement for mean is missing"); + } + + if (!var) + { + throw ngraph_error("GetOutputElement for variance is missing"); + } + + auto bbn = std::make_shared( + input, gamma, beta, mean, var, deltas.at(0), get_eps_value()); + auto dinput = std::make_shared(bbn, 0); + auto dgamma = std::make_shared(bbn, 1); + auto dbeta = std::make_shared(bbn, 2); + + adjoints.add_delta(input, dinput); + adjoints.add_delta(gamma, dgamma); + adjoints.add_delta(beta, dbeta); +} + +ngraph::op::BatchNormInference::BatchNormInference(std::shared_ptr input, + std::shared_ptr gamma, + std::shared_ptr beta, + std::shared_ptr mean, + std::shared_ptr variance, + double epsilon) + : Op("BatchNormInference", check_single_output_args({gamma, beta, input, mean, variance})) + , m_epsilon(epsilon) +{ + constructor_validate_and_infer_types(); +} + +// DEPRECATED +ngraph::op::BatchNormInference::BatchNormInference(double eps, + std::shared_ptr gamma, + std::shared_ptr beta, + std::shared_ptr input, + std::shared_ptr mean, + std::shared_ptr variance) + : Op("BatchNormInference", check_single_output_args({gamma, beta, input, mean, variance})) + , m_epsilon(eps) +{ + constructor_validate_and_infer_types(); +} + +void ngraph::op::BatchNormInference::validate_and_infer_types() { element::Type result_et; PartialShape result_batch_shape; - PartialShape result_channel_shape; + PartialShape result_channel_shape; // unused here + set_output_size(1); std::tie(result_et, result_batch_shape, result_channel_shape) = infer_batch_norm_forward(this, get_input_element_type(INPUT_DATA), get_input_element_type(INPUT_GAMMA), get_input_element_type(INPUT_BETA), + get_input_element_type(INPUT_MEAN), + get_input_element_type(INPUT_VARIANCE), get_input_partial_shape(INPUT_DATA), get_input_partial_shape(INPUT_GAMMA), - get_input_partial_shape(INPUT_BETA)); + get_input_partial_shape(INPUT_BETA), + get_input_partial_shape(INPUT_MEAN), + get_input_partial_shape(INPUT_VARIANCE)); set_output_type(0, result_et, result_batch_shape); - set_output_type(1, result_et, result_channel_shape); - set_output_type(2, result_et, result_channel_shape); } std::shared_ptr @@ -93,28 +165,20 @@ std::shared_ptr { check_new_args_count(this, new_args); return std::make_shared( - m_epsilon, new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3), new_args.at(4)); -} - -std::shared_ptr - ngraph::op::BatchNormTraining::copy_with_new_args(const NodeVector& new_args) const -{ - check_new_args_count(this, new_args); - return std::make_shared( - m_epsilon, new_args.at(0), new_args.at(1), new_args.at(2)); + new_args.at(2), new_args.at(0), new_args.at(1), new_args.at(3), new_args.at(4), m_epsilon); } ngraph::op::BatchNormTrainingBackprop::BatchNormTrainingBackprop( - double eps, + std::shared_ptr input, std::shared_ptr gamma, std::shared_ptr beta, - std::shared_ptr input, std::shared_ptr mean, std::shared_ptr variance, - std::shared_ptr delta) + std::shared_ptr delta, + double epsilon) : Op("BatchNormTrainingBackprop", check_single_output_args({gamma, beta, input, mean, variance, delta})) - , m_epsilon(eps) + , m_epsilon(epsilon) { set_output_size(3); @@ -167,51 +231,11 @@ std::shared_ptr ngraph::op::BatchNormTrainingBackprop::copy_with_new_args(const NodeVector& new_args) const { check_new_args_count(this, new_args); - return std::make_shared(m_epsilon, + return std::make_shared(new_args.at(2), new_args.at(0), new_args.at(1), - new_args.at(2), new_args.at(3), new_args.at(4), - new_args.at(5)); -} - -void ngraph::op::BatchNormTraining::generate_adjoints(autodiff::Adjoints& adjoints, - const NodeVector& deltas) -{ - auto gamma = get_argument(0); - auto beta = get_argument(1); - auto input = get_argument(2); - std::shared_ptr mean = nullptr; - std::shared_ptr var = nullptr; - - // Extract mean and variance outputs from BatchNormBase - // as these are used by BatchNormTrainingBackprop. - // The users of the outputs (GetOutputElements' Inputs) aren't sorted - // and get_n() is used to sort the inputs in the same order as Batchnorm's outputs - // Next, Mean and Variance (`at(1)` and `at(2)`) are extracted - // Please see `add_output` in `BatchNormBase::BatchNormBase` for more details - - auto goes = op::get_output_elements(shared_from_this()); - mean = goes.at(1); - var = goes.at(2); - if (!mean) - { - throw ngraph_error("GetOutputElement for mean is missing"); - } - - if (!var) - { - throw ngraph_error("GetOutputElement for variance is missing"); - } - - auto bbn = std::make_shared( - get_eps_value(), gamma, beta, input, mean, var, deltas.at(0)); - auto dinput = std::make_shared(bbn, 0); - auto dgamma = std::make_shared(bbn, 1); - auto dbeta = std::make_shared(bbn, 2); - - adjoints.add_delta(input, dinput); - adjoints.add_delta(gamma, dgamma); - adjoints.add_delta(beta, dbeta); + new_args.at(5), + m_epsilon); } diff --git a/src/ngraph/op/batch_norm.hpp b/src/ngraph/op/batch_norm.hpp index b2f30a9bd6d..1069ca2d322 100644 --- a/src/ngraph/op/batch_norm.hpp +++ b/src/ngraph/op/batch_norm.hpp @@ -27,9 +27,20 @@ namespace ngraph { namespace op { + // \brief Batchnorm for training operation class BatchNormTraining : public Op { public: + // \param input Must have rank >= 2, [., C, ...] + // \param gamma gamma scaling for normalized value. [C] + // \param beta bias added to the scaled normalized value [C] + // \param epsilon Avoids divsion by 0 if input has 0 variance + BatchNormTraining(std::shared_ptr input, + std::shared_ptr gamma, + std::shared_ptr beta, + double epsilon); + + // DEPRECATED // In this version of BatchNorm: // // MEAN AND VARIANCE: computed directly from the content of 'input'. @@ -49,6 +60,7 @@ namespace ngraph // output[0]: shall have the same shape as 'input'. // output[1]: shall have rank 1, with the same span as input's channel axis. // output[2]: shall have rank 1, with the same span as input's channel axis. + // DEPRECATED BatchNormTraining(double eps, std::shared_ptr gamma, std::shared_ptr beta, @@ -75,6 +87,20 @@ namespace ngraph class BatchNormInference : public Op { public: + // \param input [., C, ...] + // \param gamma gamma scaling for normalized value. [C] + // \param beta bias added to the scaled normalized value [C] + // \param mean value for mean normalization [C] + // \param variance value for variance normalization [C] + // \param epsilon Avoids divsion by 0 if input has 0 variance + BatchNormInference(std::shared_ptr input, + std::shared_ptr gamma, + std::shared_ptr beta, + std::shared_ptr mean, + std::shared_ptr variance, + double epsilon); + + // DEPRECATED // In this version of BatchNorm: // // MEAN AND VARIANCE: provided by the 'mean' and 'variance' parameters. @@ -92,6 +118,7 @@ namespace ngraph // mean: must have rank 1, with the same span as input's channel axis. // variance: must have rank 1, with the same span as input's channel axis. // output: shall have the same shape as 'input'. + // DEPRECATED BatchNormInference(double eps, std::shared_ptr gamma, std::shared_ptr beta, @@ -125,13 +152,13 @@ namespace ngraph class BatchNormTrainingBackprop : public Op { public: - BatchNormTrainingBackprop(double eps, + BatchNormTrainingBackprop(std::shared_ptr input, std::shared_ptr gamma, std::shared_ptr beta, - std::shared_ptr input, std::shared_ptr mean, std::shared_ptr variance, - std::shared_ptr delta); + std::shared_ptr delta, + double epsilon); void validate_and_infer_types() override; diff --git a/src/ngraph/serializer.cpp b/src/ngraph/serializer.cpp index 4f1d611d15b..3e6e7eaf75b 100644 --- a/src/ngraph/serializer.cpp +++ b/src/ngraph/serializer.cpp @@ -531,21 +531,21 @@ static shared_ptr case OP_TYPEID::BatchNormTraining: { auto epsilon = node_js.at("eps").get(); - node = make_shared(epsilon, args[0], args[1], args[2]); + node = make_shared(args[2], args[0], args[1], epsilon); break; } case OP_TYPEID::BatchNormInference: { auto epsilon = node_js.at("eps").get(); node = make_shared( - epsilon, args[0], args[1], args[2], args[3], args[4]); + args[2], args[0], args[1], args[3], args[4], epsilon); break; } case OP_TYPEID::BatchNormTrainingBackprop: { auto epsilon = node_js.at("eps").get(); node = make_shared( - epsilon, args[0], args[1], args[2], args[3], args[4], args[5]); + args[2], args[0], args[1], args[3], args[4], args[5], epsilon); break; } case OP_TYPEID::Broadcast: diff --git a/test/autodiff.in.cpp b/test/autodiff.in.cpp index 457322c34c4..1aac9d4d8b4 100644 --- a/test/autodiff.in.cpp +++ b/test/autodiff.in.cpp @@ -1639,7 +1639,7 @@ NGRAPH_TEST(${BACKEND_NAME}, backwards_batch_norm_three_outputs) auto B = make_shared(element::f64, shape_mean); auto C = make_shared(element::f64, shape_mean); - auto BN = make_shared(1e-3, B, C, A); + auto BN = make_shared(A, B, C, 1e-3); // make sure we create GOEs for mean and variance needed for bprop goes.push_back(make_shared(BN, 1)); goes.push_back(make_shared(BN, 2)); diff --git a/test/backend_test.in.cpp b/test/backend_test.in.cpp index 167411cb7f9..fd92de668c2 100644 --- a/test/backend_test.in.cpp +++ b/test/backend_test.in.cpp @@ -299,7 +299,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batch_norm_one_output) auto Gamma = op::Constant::create(element::f64, shape_mean, {1.75437676, 0.37950502, 1.13727544}); - auto BN = make_shared(1e-3, Gamma, Beta, A, Mean, Variance); + auto BN = make_shared(A, Gamma, Beta, Mean, Variance, 1e-3); auto f = make_shared(BN, op::ParameterVector{A}); auto backend = runtime::Backend::create("${BACKEND_NAME}"); @@ -329,7 +329,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batch_norm_three_outputs) auto Gamma = op::Constant::create(element::f64, shape_mean, {1.75437676, 0.37950502, 1.13727544}); - auto BN = make_shared(1e-3, Gamma, Beta, A); + auto BN = make_shared(A, Gamma, Beta, 1e-3); auto f0 = make_shared(make_shared(BN, 0), op::ParameterVector{A}); @@ -4440,7 +4440,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_b1c2h2w2) auto beta = make_shared(element::f32, beta_shape); double eps = 0.001; auto shape_r = Shape{1, 2, 2, 2}; - auto bn = make_shared(eps, gamma, beta, input); + auto bn = make_shared(input, gamma, beta, eps); auto output_rt = std::make_shared(bn, 0); auto mean_rt = std::make_shared(bn, 1); @@ -4503,7 +4503,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_b2c2h2w1) auto beta = make_shared(element::f32, beta_shape); double eps = 0.001; auto shape_r = Shape{2, 2, 2, 1}; - auto bn = make_shared(eps, gamma, beta, input); + auto bn = make_shared(input, gamma, beta, eps); auto output_rt = std::make_shared(bn, 0); auto mean_rt = std::make_shared(bn, 1); @@ -4560,7 +4560,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_bprop_n4c3h2w2) auto beta = make_shared(element::f32, beta_shape); double eps = 0.001; auto shape_r = Shape{4, 3, 2, 2}; - auto bn = make_shared(eps, gamma, beta, input); + auto bn = make_shared(input, gamma, beta, eps); auto bn_dx = make_shared(bn, 0); auto bn_dgamma = make_shared(bn, 1); auto bn_dbeta = make_shared(bn, 2); @@ -4655,7 +4655,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_inference_b2c2h2w1) auto beta = make_shared(element::f32, beta_shape); double eps = 0.001; auto shape_r = Shape{2, 2, 2, 1}; - auto bn = make_shared(eps, gamma, beta, input, mean, var); + auto bn = make_shared(input, gamma, beta, mean, var, eps); auto f = make_shared(bn, op::ParameterVector{input, gamma, beta, mean, var}); auto backend = runtime::Backend::create("${BACKEND_NAME}"); @@ -4704,7 +4704,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_globalstats_b2c2w2h1) auto beta = make_shared(element::f32, beta_shape); double eps = 0.001; auto shape_r = Shape{2, 2, 2, 1}; - auto bn = make_shared(eps, gamma, beta, input, mean, var); + auto bn = make_shared(input, gamma, beta, mean, var, eps); auto f = make_shared(bn, op::ParameterVector{gamma, beta, input, mean, var}); auto backend = runtime::Backend::create("${BACKEND_NAME}"); @@ -5456,14 +5456,14 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_bprop) auto g = std::make_shared(element::f32, sca); auto b = std::make_shared(element::f32, sca); auto input = std::make_shared(element::f32, vec); - auto bn_fp = std::make_shared(eps, g, b, input); + auto bn_fp = std::make_shared(input, g, b, eps); auto bnorm = std::make_shared(bn_fp, 0); auto mean = std::make_shared(bn_fp, 1); auto var = std::make_shared(bn_fp, 2); auto delta = std::make_shared(element::f32, vec); auto bn_bp = - std::make_shared(eps, g, b, bnorm, mean, var, delta); + std::make_shared(bnorm, g, b, mean, var, delta, eps); auto dx = std::make_shared(bn_bp, 0); std::vector> args = { @@ -5487,7 +5487,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_bprop_2step) auto g = std::make_shared(element::f32, sca); auto b = std::make_shared(element::f32, sca); auto input = std::make_shared(element::f32, vec); - auto bn_fp = std::make_shared(eps, g, b, input); + auto bn_fp = std::make_shared(input, g, b, eps); auto bnorm = std::make_shared(bn_fp, 0); auto mean = std::make_shared(bn_fp, 1); auto var = std::make_shared(bn_fp, 2); @@ -5508,7 +5508,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_bprop_2step) auto m = std::make_shared(element::f32, sca); auto v = std::make_shared(element::f32, sca); auto delta = std::make_shared(element::f32, vec); - auto bn_bp = std::make_shared(eps, g, b, bn_output, m, v, delta); + auto bn_bp = std::make_shared(bn_output, g, b, m, v, delta, eps); auto dx = std::make_shared(bn_bp, 0); args.pop_back(); // remove x diff --git a/test/cpu_fusion.cpp b/test/cpu_fusion.cpp index cdfe7fd98bc..6bb5cc3fe0e 100644 --- a/test/cpu_fusion.cpp +++ b/test/cpu_fusion.cpp @@ -718,7 +718,7 @@ TEST(cpu_fusion, batchnorm_fprop_relu_b1c2h2w2) auto beta = make_shared(element::f32, beta_shape); double eps = 0.001; auto shape_r = Shape{1, 2, 2, 2}; - auto bn = make_shared(eps, gamma, beta, input); + auto bn = make_shared(input, gamma, beta, eps); auto output_rt = std::make_shared(bn, 0); // Note, op::Splice is used to break Relu(BatchNorm) fusion @@ -1082,8 +1082,8 @@ shared_ptr gen_groupconv_batchnorm(const bool add_goe, // Adding a goe will stop fusion since the patterns wont expect to see this op auto bn = - add_goe ? std::make_shared(eps, gamma, beta, goe_bn, mean, var) - : std::make_shared(eps, gamma, beta, group_conv, mean, var); + add_goe ? std::make_shared(goe_bn, gamma, beta, mean, var, eps) + : std::make_shared(group_conv, gamma, beta, mean, var, eps); if (with_relu) { auto prelu = std::make_shared(bn); @@ -1767,7 +1767,7 @@ TEST(cpu_fusion, conv_batch_norm_folding) auto mean = std::make_shared(element::f32, shape_norm); auto var = std::make_shared(element::f32, shape_norm); auto conv = std::make_shared(input, weights, Strides{1, 1}, Strides{1, 1}); - auto bn = std::make_shared(eps, gamma, beta, conv, mean, var); + auto bn = std::make_shared(conv, gamma, beta, mean, var, eps); auto f = make_shared(NodeVector{bn}, op::ParameterVector{input, weights, gamma, beta, mean, var}); return f; @@ -1829,7 +1829,7 @@ TEST(cpu_fusion, convbias_batch_norm_folding) auto conv = std::make_shared(input, weights, Strides{1, 1}, Strides{1, 1}); auto convbias = conv + std::make_shared(bias, conv->get_shape(), AxisSet{0, 2, 3}); - auto bn = std::make_shared(eps, gamma, beta, convbias, mean, var); + auto bn = std::make_shared(convbias, gamma, beta, mean, var, eps); auto f = make_shared( NodeVector{bn}, op::ParameterVector{input, weights, bias, gamma, beta, mean, var}); return f; diff --git a/test/type_prop.cpp b/test/type_prop.cpp index a2be02ab2ec..046b641f20a 100644 --- a/test/type_prop.cpp +++ b/test/type_prop.cpp @@ -208,7 +208,7 @@ TEST(type_prop, batchnorm_training_rank_less_than_2) auto dummy = make_shared(element::f32, Shape{1}); try { - auto bc = make_shared(0.001, dummy, dummy, dummy); + auto bc = make_shared(dummy, dummy, dummy, 0.001); FAIL() << "BatchNorm c-tor should throw for tensors whose rank is less than 2"; } catch (const NodeValidationError& error) @@ -229,7 +229,7 @@ TEST(type_prop, batchnorm_training_zero_channel_check) auto beta = make_shared(element::f32, Shape{0}); try { - auto bc = make_shared(0.001, gamma, beta, data_batch); + auto bc = make_shared(data_batch, gamma, beta, 0.001); FAIL() << "BatchNorm c-tor should throw for tensors w/ zero-dimension channels"; } catch (const NodeValidationError& error) @@ -250,7 +250,7 @@ TEST(type_prop, batchnorm_training_et_check) try { - auto bc = make_shared(0.001, gamma, beta, data_batch); + auto bc = make_shared(data_batch, gamma, beta, 0.001); FAIL() << "BatchNorm c-tor should throw for different element types"; } catch (const NodeValidationError& error) @@ -271,7 +271,7 @@ TEST(type_prop, batchnorm_training_shape_check) try { - auto bc = make_shared(0.001, gamma, beta, data_batch); + auto bc = make_shared(data_batch, gamma, beta, 0.001); FAIL() << "BatchNorm c-tor should throw if gamma and beta shapes don't match"; } catch (const NodeValidationError& error) @@ -296,7 +296,7 @@ TEST(type_prop, batchnorm_training_backprop_et_check) try { auto bc = make_shared( - 0.001, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, 0.001); FAIL() << "Deduced type should disagree with c-tor arguments"; } catch (const NodeValidationError& error) @@ -321,7 +321,7 @@ TEST(type_prop, batchnorm_training_backprop_shape_check) try { auto bc = make_shared( - 0.001, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, 0.001); FAIL() << "Deduced type should disagree with c-tor arguments"; } catch (const NodeValidationError& error) @@ -345,7 +345,7 @@ TEST(type_prop, batchnorm_training_backprop_delta_check) try { auto bc = make_shared( - 0.001, dummy, dummy, param, dummy, dummy, delta); + param, dummy, dummy, dummy, dummy, delta, 0.001); FAIL() << "Deduced type should disagree with c-tor arguments"; } catch (const NodeValidationError& error) @@ -379,7 +379,7 @@ TEST(type_prop, batchnorm_inference_partial_all_rank_dynamic) auto mean = make_shared(mean_et, mean_shape); auto variance = make_shared(variance_et, variance_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch, mean, variance); + auto bn = make_shared(data_batch, gamma, beta, mean, variance, epsilon); ASSERT_EQ(bn->get_output_size(), 1); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -407,7 +407,7 @@ TEST(type_prop, batchnorm_inference_partial_input_rank_static_dynamic_ok) auto mean = make_shared(mean_et, mean_shape); auto variance = make_shared(variance_et, variance_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch, mean, variance); + auto bn = make_shared(data_batch, gamma, beta, mean, variance, epsilon); ASSERT_EQ(bn->get_output_size(), 1); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -439,7 +439,7 @@ TEST(type_prop, batchnorm_inference_partial_input_rank_static_dynamic_zero_chann try { auto bn = - make_shared(epsilon, gamma, beta, data_batch, mean, variance); + make_shared(data_batch, gamma, beta, mean, variance, epsilon); FAIL() << "Zero channel count not detected"; } catch (const NodeValidationError& error) @@ -472,7 +472,7 @@ TEST(type_prop, batchnorm_inference_partial_input_rank_dynamic_some_rank_static_ auto mean = make_shared(mean_et, mean_shape); auto variance = make_shared(variance_et, variance_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch, mean, variance); + auto bn = make_shared(data_batch, gamma, beta, mean, variance, epsilon); ASSERT_EQ(bn->get_output_size(), 1); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -502,7 +502,7 @@ TEST(type_prop, batchnorm_inference_partial_input_rank_dynamic_some_rank_static_ try { auto bn = - make_shared(epsilon, gamma, beta, data_batch, mean, variance); + make_shared(data_batch, gamma, beta, mean, variance, epsilon); FAIL() << "Wrong gamma/beta/mean/variance shape not detected"; } catch (const NodeValidationError& error) @@ -541,7 +541,7 @@ TEST(type_prop, try { auto bn = - make_shared(epsilon, gamma, beta, data_batch, mean, variance); + make_shared(data_batch, gamma, beta, mean, variance, epsilon); FAIL() << "Inconsistent gamma/beta/mean/variance shape not detected"; } catch (const NodeValidationError& error) @@ -579,7 +579,7 @@ TEST(type_prop, try { auto bn = - make_shared(epsilon, gamma, beta, data_batch, mean, variance); + make_shared(data_batch, gamma, beta, mean, variance, epsilon); FAIL() << "Inconsistent gamma/beta/mean/variance channel count not detected"; } catch (const NodeValidationError& error) @@ -613,7 +613,7 @@ TEST(type_prop, batchnorm_inference_partial_input_rank_static_dynamic_some_stati auto mean = make_shared(mean_et, mean_shape); auto variance = make_shared(variance_et, variance_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch, mean, variance); + auto bn = make_shared(data_batch, gamma, beta, mean, variance, epsilon); ASSERT_EQ(bn->get_output_size(), 1); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -645,7 +645,7 @@ TEST(type_prop, try { auto bn = - make_shared(epsilon, gamma, beta, data_batch, mean, variance); + make_shared(data_batch, gamma, beta, mean, variance, epsilon); FAIL() << "Inconsistent input/gamma/beta/mean/variance channel count not detected"; } catch (const NodeValidationError& error) @@ -674,7 +674,7 @@ TEST(type_prop, batchnorm_training_partial_all_rank_dynamic) auto gamma = make_shared(gamma_et, gamma_shape); auto beta = make_shared(beta_et, beta_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -700,7 +700,7 @@ TEST(type_prop, batchnorm_training_partial_input_rank_static_dynamic_batch_size_ auto gamma = make_shared(gamma_et, gamma_shape); auto beta = make_shared(beta_et, beta_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -727,7 +727,7 @@ TEST(type_prop, batchnorm_training_partial_input_rank_static_dynamic_channel_cou auto gamma = make_shared(gamma_et, gamma_shape); auto beta = make_shared(beta_et, beta_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -755,7 +755,7 @@ TEST(type_prop, batchnorm_training_partial_input_rank_static_dynamic_zero_channe auto beta = make_shared(beta_et, beta_shape); try { - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); FAIL() << "Zero channel count not detected"; } catch (const NodeValidationError& error) @@ -782,7 +782,7 @@ TEST(type_prop, batchnorm_training_partial_input_rank_dynamic_some_rank_static_d auto gamma = make_shared(gamma_et, gamma_shape); auto beta = make_shared(beta_et, beta_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -809,7 +809,7 @@ TEST(type_prop, batchnorm_training_partial_input_rank_dynamic_some_rank_static_d try { - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); FAIL() << "Wrong gamma/beta shape not detected"; } catch (const NodeValidationError& error) @@ -840,7 +840,7 @@ TEST(type_prop, try { - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); FAIL() << "Inconsistent gamma/beta shape not detected"; } catch (const NodeValidationError& error) @@ -870,7 +870,7 @@ TEST(type_prop, try { - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); FAIL() << "Inconsistent gamma/beta channel count not detected"; } catch (const NodeValidationError& error) @@ -897,7 +897,7 @@ TEST(type_prop, batchnorm_training_partial_input_rank_static_dynamic_some_static auto gamma = make_shared(gamma_et, gamma_shape); auto beta = make_shared(beta_et, beta_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -926,7 +926,7 @@ TEST(type_prop, try { - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); FAIL() << "Inconsistent input/gamma/beta channel count not detected"; } catch (const NodeValidationError& error) @@ -970,7 +970,7 @@ TEST(type_prop, batchnorm_training_backprop_partial_all_rank_dynamic) auto delta = make_shared(delta_et, delta_shape); auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -1006,7 +1006,7 @@ TEST(type_prop, batchnorm_training_backprop_partial_input_rank_static_dynamic_ok auto delta = make_shared(delta_et, delta_shape); auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -1045,7 +1045,7 @@ TEST(type_prop, batchnorm_training_backprop_partial_input_rank_static_dynamic_ze try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "Zero channel count not detected"; } catch (const NodeValidationError& error) @@ -1082,7 +1082,7 @@ TEST(type_prop, batchnorm_training_backprop_partial_delta_rank_static_dynamic_ok auto delta = make_shared(delta_et, delta_shape); auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -1118,7 +1118,7 @@ TEST(type_prop, batchnorm_training_backprop_partial_delta_rank_static_dynamic_ch auto delta = make_shared(delta_et, delta_shape); auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -1156,7 +1156,7 @@ TEST(type_prop, batchnorm_training_backprop_partial_delta_rank_static_dynamic_ze try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "Zero channel count not detected"; } catch (const NodeValidationError& error) @@ -1194,7 +1194,7 @@ TEST(type_prop, auto delta = make_shared(delta_et, delta_shape); auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -1233,7 +1233,7 @@ TEST( try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "Wrong gamma/beta/mean/variance shape not detected"; } catch (const NodeValidationError& error) @@ -1276,7 +1276,7 @@ TEST( try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "Wrong gamma/beta/mean/variance shape not detected"; } catch (const NodeValidationError& error) @@ -1318,7 +1318,7 @@ TEST( try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "nconsistent gamma/beta/mean/variance channel count not detected"; } catch (const NodeValidationError& error) @@ -1357,7 +1357,7 @@ TEST(type_prop, auto delta = make_shared(delta_et, delta_shape); auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -1396,7 +1396,7 @@ TEST( try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "Inconsistent delta/gamma/beta/mean/variance channel count not detected"; } catch (const NodeValidationError& error) @@ -1439,7 +1439,7 @@ TEST( try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "Inconsistent input/delta batch size not detected"; } catch (const NodeValidationError& error) @@ -1483,7 +1483,7 @@ TEST( try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "Inconsistent input/delta spatial dimensions not detected"; } catch (const NodeValidationError& error)