From 361fb42b9ad571bd3e9cfbed0d1dda5d75c8b6a1 Mon Sep 17 00:00:00 2001 From: Scott Cyphers Date: Wed, 31 Oct 2018 08:33:27 -0700 Subject: [PATCH 01/10] Doc for Batchnorm, fix argument order, cleanup some comments --- doc/sphinx/ngraph.doxyfile | 2 +- doc/sphinx/source/ops/batch_norm.rst | 105 - .../source/ops/batch_norm_inference.rst | 80 + doc/sphinx/source/ops/batch_norm_training.rst | 89 + .../ops/batch_norm_training_backprop.rst | 71 + doc/sphinx/source/ops/index.rst | 8 +- src/ngraph/descriptor/input.hpp | 4 +- .../descriptor/layout/tensor_layout.hpp | 4 +- src/ngraph/descriptor/output.hpp | 2 +- src/ngraph/descriptor/tensor.hpp | 2 +- src/ngraph/op/batch_norm.cpp | 180 +- src/ngraph/op/batch_norm.hpp | 33 +- src/ngraph/serializer.cpp | 6 +- test/autodiff.in.cpp | 2 +- test/backend_test.in.cpp | 22 +- test/backend_test.in.cpp-9bfce850 | 5571 +++++++++++++++++ test/cpu_fusion.cpp | 10 +- test/cpu_fusion.cpp-41c1ba06 | 3132 +++++++++ test/type_prop.cpp | 80 +- 19 files changed, 9148 insertions(+), 255 deletions(-) delete mode 100644 doc/sphinx/source/ops/batch_norm.rst create mode 100644 doc/sphinx/source/ops/batch_norm_inference.rst create mode 100644 doc/sphinx/source/ops/batch_norm_training.rst create mode 100644 doc/sphinx/source/ops/batch_norm_training_backprop.rst create mode 100644 test/backend_test.in.cpp-9bfce850 create mode 100644 test/cpu_fusion.cpp-41c1ba06 diff --git a/doc/sphinx/ngraph.doxyfile b/doc/sphinx/ngraph.doxyfile index 88da4a97846..ffea9942355 100644 --- a/doc/sphinx/ngraph.doxyfile +++ b/doc/sphinx/ngraph.doxyfile @@ -1807,7 +1807,7 @@ SEARCH_INCLUDES = YES # preprocessor. # This tag requires that the tag SEARCH_INCLUDES is set to YES. -INCLUDE_PATH = +INCLUDE_PATH = ../../src # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the diff --git a/doc/sphinx/source/ops/batch_norm.rst b/doc/sphinx/source/ops/batch_norm.rst deleted file mode 100644 index 53b117f389a..00000000000 --- a/doc/sphinx/source/ops/batch_norm.rst +++ /dev/null @@ -1,105 +0,0 @@ -.. batch_norm.rst: - -######### -BatchNorm -######### - -.. code-block:: cpp - - BatchNorm // Produces a normalized output - - -Description -=========== - -Produces a normalized output. - -Inputs ------- - -+---------------------+-------------------------+-----------------------------+ -| Name | Element Type | Shape | -+=====================+=========================+=============================+ -| ``input`` | same as ``gamma`` | \(..., C, ...\) | -+---------------------+-------------------------+-----------------------------+ -| ``gamma`` | any | \(C\) | -+---------------------+-------------------------+-----------------------------+ -| ``beta`` | same as ``gamma`` | \(C\) | -+---------------------+-------------------------+-----------------------------+ -| ``global_mean`` | same as ``gamma`` | \(C\) | -+---------------------+-------------------------+-----------------------------+ -| ``global_variance`` | same as ``gamma`` | \(C\) | -+---------------------+-------------------------+-----------------------------+ -| ``use_global`` | ``bool`` | \(\) | -+---------------------+-------------------------+-----------------------------+ - - -Attributes ----------- - -+------------------+--------------------+---------------------+ -| Name | Type | Notes | -+==================+====================+=====================+ -| ``epsilon`` | same as ``input`` | Bias for variance | -+------------------+--------------------+---------------------+ -| ``channel_axis`` | size_t | Channel axis | -+------------------+--------------------+---------------------+ - -Outputs -------- - -+---------------------+-------------------------+-----------------------------+ -| Name | Element Type | Shape | -+=====================+=========================+=============================+ -| ``normalized`` | same as ``gamma`` | same as ``input`` | -+---------------------+-------------------------+-----------------------------+ -| ``batch_mean`` | same as ``gamma`` | \(C\) | -+---------------------+-------------------------+-----------------------------+ -| ``batch_variance`` | same as ``gamma`` | \(C\) | -+---------------------+-------------------------+-----------------------------+ - -The ``batch_mean`` and ``batch_variance`` outputs are computed per-channel from -``input``. The values only need to be computed if ``use_global`` is ``false``, -or if they are used. - - -Mathematical Definition -======================= - -The axes of the input fall into two categories: positional and channel, with -channel being axis 1. For each position, there are :math:`C` channel values, -each normalized independently. - -Normalization of a channel sample is controlled by two values: - -* the mean :math:`\mu`, and -* the variance :math:`\sigma^2`; - -and by two scaling attributes: :math:`\gamma` and :math:`\beta`. - -The values for :math:`\mu` and :math:`\sigma^2` come either from computing the -mean and variance of ``input``, or from ``global_mean`` and ``global_variance``, -depending on the value of ``use_global``. - -.. math:: - - y_c = \frac{x_c-\mu_c}{\sqrt{\sigma^2_c+\epsilon}}\gamma_c+\beta_c - -The mean and variance can be arguments, or they may be computed for each channel -of ``input`` over the positional axes. When computed from ``input``, the mean -and variance per-channel are available as outputs. - - -C++ Interface -============== - -.. doxygenclass:: ngraph::op::BatchNormTraining - :project: ngraph - :members: - - -.. doxygenclass:: ngraph::op::BatchNormInference - :project: ngraph - :members: - - diff --git a/doc/sphinx/source/ops/batch_norm_inference.rst b/doc/sphinx/source/ops/batch_norm_inference.rst new file mode 100644 index 00000000000..9017ac19c20 --- /dev/null +++ b/doc/sphinx/source/ops/batch_norm_inference.rst @@ -0,0 +1,80 @@ +.. batch_norm_inference.rst: + +################## +BatchNormInference +################## + +.. code-block:: cpp + + BatchNormInference // Adjust input for mean and variance + + +Description +=========== + + + +Inputs +------ + ++---------------------+-------------------------+------------------------------+ +| Name | Element Type | Shape | ++=====================+=========================+==============================+ +| ``input`` | real | :math:`(\bullet, C, \ldots)` | ++---------------------+-------------------------+------------------------------+ +| ``gamma`` | same as ``input`` | :math:`(C)` | ++---------------------+-------------------------+------------------------------+ +| ``beta`` | same as ``input`` | :math:`(C)` | ++---------------------+-------------------------+------------------------------+ +| ``mean`` | same as ``input`` | :math:`(C)` | ++---------------------+-------------------------+------------------------------+ +| ``variances`` | same as ``input`` | :math:`(C)` | ++---------------------+-------------------------+------------------------------+ + + +Attributes +---------- + ++------------------+--------------------+--------------------------------------------------------+ +| Name | Type | Notes | ++==================+====================+========================================================+ +| ``epsilon`` | ``double`` | Small bias added to variance to avoid division by 0. | ++------------------+--------------------+--------------------------------------------------------+ + +Outputs +------- + ++---------------------+-------------------------+-----------------------------+ +| Name | Element Type | Shape | ++=====================+=========================+=============================+ +| ``normalized`` | same as ``gamma`` | Same as ``input`` | ++---------------------+-------------------------+-----------------------------+ + +Mathematical Definition +======================= + +The axes of the input fall into two categories: positional and channel, with +channel being axis 1. For each position, there are :math:`C` channel values, +each normalized independently. + +Normalization of a channel sample is controlled by two values: + +* the `mean` :math:`\mu`, and + +* the `variance` :math:`\sigma^2`; + +and by two scaling attributes: :math:`\gamma` and :math:`\beta`. + +.. math:: + + \mathtt{normalized}_{\bullet, c, \ldots} = \frac{\mathtt{input}_{\bullet, c, \ldots}-\mu_c}{\sqrt{\sigma^2_c+\epsilon}}\gamma_c+\beta_c + + +C++ Interface +============== + +.. doxygenclass:: ngraph::op::BatchNormInference + :project: ngraph + :members: + + diff --git a/doc/sphinx/source/ops/batch_norm_training.rst b/doc/sphinx/source/ops/batch_norm_training.rst new file mode 100644 index 00000000000..a458d54a061 --- /dev/null +++ b/doc/sphinx/source/ops/batch_norm_training.rst @@ -0,0 +1,89 @@ +.. batch_norm_training.rst: + +################# +BatchNormTraining +################# + +.. code-block:: cpp + + BatchNormTraining // Compute mean and variance from the input. + + +Description +=========== + + + +Inputs +------ + ++---------------------+-------------------------+------------------------------+ +| Name | Element Type | Shape | ++=====================+=========================+==============================+ +| ``input`` | real | :math:`(\bullet, C, \ldots)` | ++---------------------+-------------------------+------------------------------+ +| ``gamma`` | same as ``input`` | :math:`(C)` | ++---------------------+-------------------------+------------------------------+ +| ``beta`` | same as ``input`` | :math:`(C)` | ++---------------------+-------------------------+------------------------------+ + + +Attributes +---------- + ++------------------+--------------------+--------------------------------------------------------+ +| Name | Type | Notes | ++==================+====================+========================================================+ +| ``epsilon`` | ``double`` | Small bias added to variance to avoid division by 0. | ++------------------+--------------------+--------------------------------------------------------+ + +Outputs +------- + ++---------------------+-------------------------+-----------------------------+ +| Name | Element Type | Shape | ++=====================+=========================+=============================+ +| ``normalized`` | same as ``gamma`` | Same as ``input`` | ++---------------------+-------------------------+-----------------------------+ +| ``batch_mean`` | same as ``gamma`` | :math:`(C)` | ++---------------------+-------------------------+-----------------------------+ +| ``batch_variance`` | same as ``gamma`` | :math:`(C)` | ++---------------------+-------------------------+-----------------------------+ + +The ``batch_mean`` and ``batch_variance`` outputs are computed per-channel from +``input``. + + +Mathematical Definition +======================= + +The axes of the input fall into two categories: positional and channel, with +channel being axis 1. For each position, there are :math:`C` channel values, +each normalized independently. + +Normalization of a channel sample is controlled by two values: + +* the `batch_mean` :math:`\mu`, and + +* the `batch_variance` :math:`\sigma^2`; + +and by two scaling attributes: :math:`\gamma` and :math:`\beta`. + +The values for :math:`\mu` and :math:`\sigma^2` come from computing the +mean and variance of ``input``. + +.. math:: + + \mu_c &= \mathop{\mathbb{E}}\left(\mathtt{input}_{\bullet, c, \ldots}\right)\\ + \sigma^2_c &= \mathop{\mathtt{Var}}\left(\mathtt{input}_{\bullet, c, \ldots}\right)\\ + \mathtt{normlized}_{\bullet, c, \ldots} &= \frac{\mathtt{input}_{\bullet, c, \ldots}-\mu_c}{\sqrt{\sigma^2_c+\epsilon}}\gamma_c+\beta_c + + +C++ Interface +============== + +.. doxygenclass:: ngraph::op::BatchNormTraining + :project: ngraph + :members: + + diff --git a/doc/sphinx/source/ops/batch_norm_training_backprop.rst b/doc/sphinx/source/ops/batch_norm_training_backprop.rst new file mode 100644 index 00000000000..68004bbf092 --- /dev/null +++ b/doc/sphinx/source/ops/batch_norm_training_backprop.rst @@ -0,0 +1,71 @@ +.. batch_norm_training_backprop.rst: + +######################### +BatchNormTrainingBackprop +######################### + +.. code-block:: cpp + + BatchNormTrainingBackprop // Compute mean and variance backprop from the input. + + +Description +=========== + + + +Inputs +------ + ++----------------------+-------------------------+------------------------------+ +| Name | Element Type | Shape | ++======================+=========================+==============================+ +| ``input`` | real | :math:`(\bullet, C, \ldots)` | ++----------------------+-------------------------+------------------------------+ +| ``gamma`` | same as ``input`` | :math:`(C)` | ++----------------------+-------------------------+------------------------------+ +| ``beta`` | same as ``input`` | :math:`(C)` | ++----------------------+-------------------------+------------------------------+ +| ``mean`` | same as ``input`` | :math:`(C)` | ++----------------------+-------------------------+------------------------------+ +| ``variance`` | same as ``input`` | :math:`(C)` | ++----------------------+-------------------------+------------------------------+ +| ``normalized_delta`` | same as ``input`` | :math:`input` | ++----------------------+-------------------------+------------------------------+ + + +Attributes +---------- + ++------------------+--------------------+--------------------------------------------------------+ +| Name | Type | Notes | ++==================+====================+========================================================+ +| ``epsilon`` | ``double`` | Small bias added to variance to avoid division by 0. | ++------------------+--------------------+--------------------------------------------------------+ + +Outputs +------- + ++---------------------+-------------------------+-----------------------------+ +| Name | Element Type | Shape | ++=====================+=========================+=============================+ +| ``input_delta`` | same as ``input`` | Same as ``input`` | ++---------------------+-------------------------+-----------------------------+ +| ``gamma_delta`` | same as ``gamma`` | :math:`(C)` | ++---------------------+-------------------------+-----------------------------+ +| ``beta_delta`` | same as ``beta`` | :math:`(C)` | ++---------------------+-------------------------+-----------------------------+ + + +Mathematical Definition +======================= + + +C++ Interface +============== + +.. doxygenclass:: ngraph::op::BatchNormTrainingBackprop + :project: ngraph + :members: + + diff --git a/doc/sphinx/source/ops/index.rst b/doc/sphinx/source/ops/index.rst index 3b70d1e058f..8f0423dd082 100644 --- a/doc/sphinx/source/ops/index.rst +++ b/doc/sphinx/source/ops/index.rst @@ -56,7 +56,9 @@ Not currently a comprehensive list. * :doc:`atan` * :doc:`avg_pool` * :doc:`avg_pool_backprop` - * :doc:`batch_norm` + * :doc:`batch_norm_inference` + * :doc:`batch_norm_training` + * :doc:`batch_norm_training_backprop` * :doc:`broadcast` * :doc:`ceiling` * :doc:`concat` @@ -123,7 +125,9 @@ Not currently a comprehensive list. atan.rst avg_pool.rst avg_pool_backprop.rst - batch_norm.rst + batch_norm_inference.rst + batch_norm_training.rst + batch_norm_training_backprop.rst broadcast.rst ceiling.rst concat.rst diff --git a/src/ngraph/descriptor/input.hpp b/src/ngraph/descriptor/input.hpp index e4ef4508778..74d96cd81ba 100644 --- a/src/ngraph/descriptor/input.hpp +++ b/src/ngraph/descriptor/input.hpp @@ -60,10 +60,10 @@ namespace ngraph void replace_output(Output& output); protected: - /// \return the tensor view for the connected output + /// \return the tensor for the connected output std::shared_ptr get_tensor_ptr() const; - /// \return the tensor view for the connected output + /// \return the tensor for the connected output std::shared_ptr get_tensor_ptr(); public: diff --git a/src/ngraph/descriptor/layout/tensor_layout.hpp b/src/ngraph/descriptor/layout/tensor_layout.hpp index 780a31d501c..5dc38fc5763 100644 --- a/src/ngraph/descriptor/layout/tensor_layout.hpp +++ b/src/ngraph/descriptor/layout/tensor_layout.hpp @@ -32,7 +32,7 @@ namespace ngraph { namespace layout { - /// \brief Interface for describing implementations of tensor views. + /// \brief Interface for describing implementations of tensors. /// /// Kernel selection will need to pay attention to the layout. class TensorLayout @@ -44,7 +44,7 @@ namespace ngraph public: virtual ~TensorLayout() {} - /// Extent of this view in buffer. + /// Extent of this tensor in buffer. /// /// When we support non-linear buffers, this will need to be something other than size_t. size_t get_size() const; diff --git a/src/ngraph/descriptor/output.hpp b/src/ngraph/descriptor/output.hpp index b145fb528bb..f251a0b84ed 100644 --- a/src/ngraph/descriptor/output.hpp +++ b/src/ngraph/descriptor/output.hpp @@ -39,7 +39,7 @@ namespace ngraph public: /// \param node Node that owns this output. /// \param index Position of the output tensor in all output tensors - /// \param tensor The view of this tensor; where the value will be written + /// \param tensor The tensor where the value will be written Output(Node* node, size_t index, const std::shared_ptr& tensor); std::shared_ptr get_node() const; diff --git a/src/ngraph/descriptor/tensor.hpp b/src/ngraph/descriptor/tensor.hpp index 3a5ee901e23..1bf57dcb561 100644 --- a/src/ngraph/descriptor/tensor.hpp +++ b/src/ngraph/descriptor/tensor.hpp @@ -35,7 +35,7 @@ namespace ngraph class TensorLayout; } - /// \brief Compile-time descriptor of a first-class value that is a view of a tensor. + /// \brief Compile-time descriptor of a first-class value that is a tensor. class Tensor { Tensor(const Tensor&) = delete; diff --git a/src/ngraph/op/batch_norm.cpp b/src/ngraph/op/batch_norm.cpp index 251c86e418b..4f09e461bef 100644 --- a/src/ngraph/op/batch_norm.cpp +++ b/src/ngraph/op/batch_norm.cpp @@ -22,19 +22,17 @@ #include "ngraph/op/get_output_element.hpp" #include "ngraph/validation_util.hpp" -ngraph::op::BatchNormInference::BatchNormInference(double eps, - std::shared_ptr gamma, - std::shared_ptr beta, - std::shared_ptr input, - std::shared_ptr mean, - std::shared_ptr variance) - : Op("BatchNormInference", check_single_output_args({gamma, beta, input, mean, variance})) - , m_epsilon(eps) +ngraph::op::BatchNormTraining::BatchNormTraining(std::shared_ptr input, + std::shared_ptr gamma, + std::shared_ptr beta, + double epsilon) + : Op("BatchNormTraining", check_single_output_args({gamma, beta, input})) + , m_epsilon(epsilon) { - set_output_size(1); constructor_validate_and_infer_types(); } +// DEPRECATED ngraph::op::BatchNormTraining::BatchNormTraining(double eps, std::shared_ptr gamma, std::shared_ptr beta, @@ -42,50 +40,124 @@ ngraph::op::BatchNormTraining::BatchNormTraining(double eps, : Op("BatchNormTraining", check_single_output_args({gamma, beta, input})) , m_epsilon(eps) { - set_output_size(3); constructor_validate_and_infer_types(); } -void ngraph::op::BatchNormInference::validate_and_infer_types() +void ngraph::op::BatchNormTraining::validate_and_infer_types() { element::Type result_et; PartialShape result_batch_shape; - PartialShape result_channel_shape; // unused here + PartialShape result_channel_shape; + set_output_size(3); std::tie(result_et, result_batch_shape, result_channel_shape) = infer_batch_norm_forward(this, get_input_element_type(INPUT_DATA), get_input_element_type(INPUT_GAMMA), get_input_element_type(INPUT_BETA), - get_input_element_type(INPUT_MEAN), - get_input_element_type(INPUT_VARIANCE), get_input_partial_shape(INPUT_DATA), get_input_partial_shape(INPUT_GAMMA), - get_input_partial_shape(INPUT_BETA), - get_input_partial_shape(INPUT_MEAN), - get_input_partial_shape(INPUT_VARIANCE)); + get_input_partial_shape(INPUT_BETA)); set_output_type(0, result_et, result_batch_shape); + set_output_type(1, result_et, result_channel_shape); + set_output_type(2, result_et, result_channel_shape); } -void ngraph::op::BatchNormTraining::validate_and_infer_types() +std::shared_ptr + ngraph::op::BatchNormTraining::copy_with_new_args(const NodeVector& new_args) const +{ + check_new_args_count(this, new_args); + return std::make_shared( + new_args.at(2), new_args.at(0), new_args.at(1), m_epsilon); +} + +void ngraph::op::BatchNormTraining::generate_adjoints(autodiff::Adjoints& adjoints, + const NodeVector& deltas) +{ + auto gamma = get_argument(0); + auto beta = get_argument(1); + auto input = get_argument(2); + std::shared_ptr mean = nullptr; + std::shared_ptr var = nullptr; + + // Extract mean and variance outputs from BatchNormBase + // as these are used by BatchNormTrainingBackprop. + // The users of the outputs (GetOutputElements' Inputs) aren't sorted + // and get_n() is used to sort the inputs in the same order as Batchnorm's outputs + // Next, Mean and Variance (`at(1)` and `at(2)`) are extracted + // Please see `add_output` in `BatchNormBase::BatchNormBase` for more details + + auto goes = op::get_output_elements(shared_from_this()); + mean = goes.at(1); + var = goes.at(2); + if (!mean) + { + throw ngraph_error("GetOutputElement for mean is missing"); + } + + if (!var) + { + throw ngraph_error("GetOutputElement for variance is missing"); + } + + auto bbn = std::make_shared( + input, gamma, beta, mean, var, deltas.at(0), get_eps_value()); + auto dinput = std::make_shared(bbn, 0); + auto dgamma = std::make_shared(bbn, 1); + auto dbeta = std::make_shared(bbn, 2); + + adjoints.add_delta(input, dinput); + adjoints.add_delta(gamma, dgamma); + adjoints.add_delta(beta, dbeta); +} + +ngraph::op::BatchNormInference::BatchNormInference(std::shared_ptr input, + std::shared_ptr gamma, + std::shared_ptr beta, + std::shared_ptr mean, + std::shared_ptr variance, + double epsilon) + : Op("BatchNormInference", check_single_output_args({gamma, beta, input, mean, variance})) + , m_epsilon(epsilon) +{ + constructor_validate_and_infer_types(); +} + +// DEPRECATED +ngraph::op::BatchNormInference::BatchNormInference(double eps, + std::shared_ptr gamma, + std::shared_ptr beta, + std::shared_ptr input, + std::shared_ptr mean, + std::shared_ptr variance) + : Op("BatchNormInference", check_single_output_args({gamma, beta, input, mean, variance})) + , m_epsilon(eps) +{ + constructor_validate_and_infer_types(); +} + +void ngraph::op::BatchNormInference::validate_and_infer_types() { element::Type result_et; PartialShape result_batch_shape; - PartialShape result_channel_shape; + PartialShape result_channel_shape; // unused here + set_output_size(1); std::tie(result_et, result_batch_shape, result_channel_shape) = infer_batch_norm_forward(this, get_input_element_type(INPUT_DATA), get_input_element_type(INPUT_GAMMA), get_input_element_type(INPUT_BETA), + get_input_element_type(INPUT_MEAN), + get_input_element_type(INPUT_VARIANCE), get_input_partial_shape(INPUT_DATA), get_input_partial_shape(INPUT_GAMMA), - get_input_partial_shape(INPUT_BETA)); + get_input_partial_shape(INPUT_BETA), + get_input_partial_shape(INPUT_MEAN), + get_input_partial_shape(INPUT_VARIANCE)); set_output_type(0, result_et, result_batch_shape); - set_output_type(1, result_et, result_channel_shape); - set_output_type(2, result_et, result_channel_shape); } std::shared_ptr @@ -93,28 +165,20 @@ std::shared_ptr { check_new_args_count(this, new_args); return std::make_shared( - m_epsilon, new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3), new_args.at(4)); -} - -std::shared_ptr - ngraph::op::BatchNormTraining::copy_with_new_args(const NodeVector& new_args) const -{ - check_new_args_count(this, new_args); - return std::make_shared( - m_epsilon, new_args.at(0), new_args.at(1), new_args.at(2)); + new_args.at(2), new_args.at(0), new_args.at(1), new_args.at(3), new_args.at(4), m_epsilon); } ngraph::op::BatchNormTrainingBackprop::BatchNormTrainingBackprop( - double eps, + std::shared_ptr input, std::shared_ptr gamma, std::shared_ptr beta, - std::shared_ptr input, std::shared_ptr mean, std::shared_ptr variance, - std::shared_ptr delta) + std::shared_ptr delta, + double epsilon) : Op("BatchNormTrainingBackprop", check_single_output_args({gamma, beta, input, mean, variance, delta})) - , m_epsilon(eps) + , m_epsilon(epsilon) { set_output_size(3); @@ -167,51 +231,11 @@ std::shared_ptr ngraph::op::BatchNormTrainingBackprop::copy_with_new_args(const NodeVector& new_args) const { check_new_args_count(this, new_args); - return std::make_shared(m_epsilon, + return std::make_shared(new_args.at(2), new_args.at(0), new_args.at(1), - new_args.at(2), new_args.at(3), new_args.at(4), - new_args.at(5)); -} - -void ngraph::op::BatchNormTraining::generate_adjoints(autodiff::Adjoints& adjoints, - const NodeVector& deltas) -{ - auto gamma = get_argument(0); - auto beta = get_argument(1); - auto input = get_argument(2); - std::shared_ptr mean = nullptr; - std::shared_ptr var = nullptr; - - // Extract mean and variance outputs from BatchNormBase - // as these are used by BatchNormTrainingBackprop. - // The users of the outputs (GetOutputElements' Inputs) aren't sorted - // and get_n() is used to sort the inputs in the same order as Batchnorm's outputs - // Next, Mean and Variance (`at(1)` and `at(2)`) are extracted - // Please see `add_output` in `BatchNormBase::BatchNormBase` for more details - - auto goes = op::get_output_elements(shared_from_this()); - mean = goes.at(1); - var = goes.at(2); - if (!mean) - { - throw ngraph_error("GetOutputElement for mean is missing"); - } - - if (!var) - { - throw ngraph_error("GetOutputElement for variance is missing"); - } - - auto bbn = std::make_shared( - get_eps_value(), gamma, beta, input, mean, var, deltas.at(0)); - auto dinput = std::make_shared(bbn, 0); - auto dgamma = std::make_shared(bbn, 1); - auto dbeta = std::make_shared(bbn, 2); - - adjoints.add_delta(input, dinput); - adjoints.add_delta(gamma, dgamma); - adjoints.add_delta(beta, dbeta); + new_args.at(5), + m_epsilon); } diff --git a/src/ngraph/op/batch_norm.hpp b/src/ngraph/op/batch_norm.hpp index b2f30a9bd6d..1069ca2d322 100644 --- a/src/ngraph/op/batch_norm.hpp +++ b/src/ngraph/op/batch_norm.hpp @@ -27,9 +27,20 @@ namespace ngraph { namespace op { + // \brief Batchnorm for training operation class BatchNormTraining : public Op { public: + // \param input Must have rank >= 2, [., C, ...] + // \param gamma gamma scaling for normalized value. [C] + // \param beta bias added to the scaled normalized value [C] + // \param epsilon Avoids divsion by 0 if input has 0 variance + BatchNormTraining(std::shared_ptr input, + std::shared_ptr gamma, + std::shared_ptr beta, + double epsilon); + + // DEPRECATED // In this version of BatchNorm: // // MEAN AND VARIANCE: computed directly from the content of 'input'. @@ -49,6 +60,7 @@ namespace ngraph // output[0]: shall have the same shape as 'input'. // output[1]: shall have rank 1, with the same span as input's channel axis. // output[2]: shall have rank 1, with the same span as input's channel axis. + // DEPRECATED BatchNormTraining(double eps, std::shared_ptr gamma, std::shared_ptr beta, @@ -75,6 +87,20 @@ namespace ngraph class BatchNormInference : public Op { public: + // \param input [., C, ...] + // \param gamma gamma scaling for normalized value. [C] + // \param beta bias added to the scaled normalized value [C] + // \param mean value for mean normalization [C] + // \param variance value for variance normalization [C] + // \param epsilon Avoids divsion by 0 if input has 0 variance + BatchNormInference(std::shared_ptr input, + std::shared_ptr gamma, + std::shared_ptr beta, + std::shared_ptr mean, + std::shared_ptr variance, + double epsilon); + + // DEPRECATED // In this version of BatchNorm: // // MEAN AND VARIANCE: provided by the 'mean' and 'variance' parameters. @@ -92,6 +118,7 @@ namespace ngraph // mean: must have rank 1, with the same span as input's channel axis. // variance: must have rank 1, with the same span as input's channel axis. // output: shall have the same shape as 'input'. + // DEPRECATED BatchNormInference(double eps, std::shared_ptr gamma, std::shared_ptr beta, @@ -125,13 +152,13 @@ namespace ngraph class BatchNormTrainingBackprop : public Op { public: - BatchNormTrainingBackprop(double eps, + BatchNormTrainingBackprop(std::shared_ptr input, std::shared_ptr gamma, std::shared_ptr beta, - std::shared_ptr input, std::shared_ptr mean, std::shared_ptr variance, - std::shared_ptr delta); + std::shared_ptr delta, + double epsilon); void validate_and_infer_types() override; diff --git a/src/ngraph/serializer.cpp b/src/ngraph/serializer.cpp index 4f1d611d15b..3e6e7eaf75b 100644 --- a/src/ngraph/serializer.cpp +++ b/src/ngraph/serializer.cpp @@ -531,21 +531,21 @@ static shared_ptr case OP_TYPEID::BatchNormTraining: { auto epsilon = node_js.at("eps").get(); - node = make_shared(epsilon, args[0], args[1], args[2]); + node = make_shared(args[2], args[0], args[1], epsilon); break; } case OP_TYPEID::BatchNormInference: { auto epsilon = node_js.at("eps").get(); node = make_shared( - epsilon, args[0], args[1], args[2], args[3], args[4]); + args[2], args[0], args[1], args[3], args[4], epsilon); break; } case OP_TYPEID::BatchNormTrainingBackprop: { auto epsilon = node_js.at("eps").get(); node = make_shared( - epsilon, args[0], args[1], args[2], args[3], args[4], args[5]); + args[2], args[0], args[1], args[3], args[4], args[5], epsilon); break; } case OP_TYPEID::Broadcast: diff --git a/test/autodiff.in.cpp b/test/autodiff.in.cpp index 1f86a9aab04..2e66b09eb34 100644 --- a/test/autodiff.in.cpp +++ b/test/autodiff.in.cpp @@ -1644,7 +1644,7 @@ NGRAPH_TEST(${BACKEND_NAME}, backwards_batch_norm_three_outputs) auto B = make_shared(element::f64, shape_mean); auto C = make_shared(element::f64, shape_mean); - auto BN = make_shared(1e-3, B, C, A); + auto BN = make_shared(A, B, C, 1e-3); // make sure we create GOEs for mean and variance needed for bprop goes.push_back(make_shared(BN, 1)); goes.push_back(make_shared(BN, 2)); diff --git a/test/backend_test.in.cpp b/test/backend_test.in.cpp index 1a48f1ab80c..1b58addb5bd 100644 --- a/test/backend_test.in.cpp +++ b/test/backend_test.in.cpp @@ -299,7 +299,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batch_norm_one_output) auto Gamma = op::Constant::create(element::f64, shape_mean, {1.75437676, 0.37950502, 1.13727544}); - auto BN = make_shared(1e-3, Gamma, Beta, A, Mean, Variance); + auto BN = make_shared(A, Gamma, Beta, Mean, Variance, 1e-3); auto f = make_shared(BN, op::ParameterVector{A}); auto backend = runtime::Backend::create("${BACKEND_NAME}"); @@ -329,7 +329,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batch_norm_three_outputs) auto Gamma = op::Constant::create(element::f64, shape_mean, {1.75437676, 0.37950502, 1.13727544}); - auto BN = make_shared(1e-3, Gamma, Beta, A); + auto BN = make_shared(A, Gamma, Beta, 1e-3); auto f0 = make_shared(make_shared(BN, 0), op::ParameterVector{A}); @@ -4412,7 +4412,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_b1c2h2w2) auto beta = make_shared(element::f32, beta_shape); double eps = 0.001; auto shape_r = Shape{1, 2, 2, 2}; - auto bn = make_shared(eps, gamma, beta, input); + auto bn = make_shared(input, gamma, beta, eps); auto output_rt = std::make_shared(bn, 0); auto mean_rt = std::make_shared(bn, 1); @@ -4475,7 +4475,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_b2c2h2w1) auto beta = make_shared(element::f32, beta_shape); double eps = 0.001; auto shape_r = Shape{2, 2, 2, 1}; - auto bn = make_shared(eps, gamma, beta, input); + auto bn = make_shared(input, gamma, beta, eps); auto output_rt = std::make_shared(bn, 0); auto mean_rt = std::make_shared(bn, 1); @@ -4532,7 +4532,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_bprop_n4c3h2w2) auto beta = make_shared(element::f32, beta_shape); double eps = 0.001; auto shape_r = Shape{4, 3, 2, 2}; - auto bn = make_shared(eps, gamma, beta, input); + auto bn = make_shared(input, gamma, beta, eps); auto bn_dx = make_shared(bn, 0); auto bn_dgamma = make_shared(bn, 1); auto bn_dbeta = make_shared(bn, 2); @@ -4627,7 +4627,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_inference_b2c2h2w1) auto beta = make_shared(element::f32, beta_shape); double eps = 0.001; auto shape_r = Shape{2, 2, 2, 1}; - auto bn = make_shared(eps, gamma, beta, input, mean, var); + auto bn = make_shared(input, gamma, beta, mean, var, eps); auto f = make_shared(bn, op::ParameterVector{input, gamma, beta, mean, var}); auto backend = runtime::Backend::create("${BACKEND_NAME}"); @@ -4676,7 +4676,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_globalstats_b2c2w2h1) auto beta = make_shared(element::f32, beta_shape); double eps = 0.001; auto shape_r = Shape{2, 2, 2, 1}; - auto bn = make_shared(eps, gamma, beta, input, mean, var); + auto bn = make_shared(input, gamma, beta, mean, var, eps); auto f = make_shared(bn, op::ParameterVector{gamma, beta, input, mean, var}); auto backend = runtime::Backend::create("${BACKEND_NAME}"); @@ -5428,14 +5428,14 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_bprop) auto g = std::make_shared(element::f32, sca); auto b = std::make_shared(element::f32, sca); auto input = std::make_shared(element::f32, vec); - auto bn_fp = std::make_shared(eps, g, b, input); + auto bn_fp = std::make_shared(input, g, b, eps); auto bnorm = std::make_shared(bn_fp, 0); auto mean = std::make_shared(bn_fp, 1); auto var = std::make_shared(bn_fp, 2); auto delta = std::make_shared(element::f32, vec); auto bn_bp = - std::make_shared(eps, g, b, bnorm, mean, var, delta); + std::make_shared(bnorm, g, b, mean, var, delta, eps); auto dx = std::make_shared(bn_bp, 0); std::vector> args = { @@ -5459,7 +5459,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_bprop_2step) auto g = std::make_shared(element::f32, sca); auto b = std::make_shared(element::f32, sca); auto input = std::make_shared(element::f32, vec); - auto bn_fp = std::make_shared(eps, g, b, input); + auto bn_fp = std::make_shared(input, g, b, eps); auto bnorm = std::make_shared(bn_fp, 0); auto mean = std::make_shared(bn_fp, 1); auto var = std::make_shared(bn_fp, 2); @@ -5480,7 +5480,7 @@ NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_bprop_2step) auto m = std::make_shared(element::f32, sca); auto v = std::make_shared(element::f32, sca); auto delta = std::make_shared(element::f32, vec); - auto bn_bp = std::make_shared(eps, g, b, bn_output, m, v, delta); + auto bn_bp = std::make_shared(bn_output, g, b, m, v, delta, eps); auto dx = std::make_shared(bn_bp, 0); args.pop_back(); // remove x diff --git a/test/backend_test.in.cpp-9bfce850 b/test/backend_test.in.cpp-9bfce850 new file mode 100644 index 00000000000..1b58addb5bd --- /dev/null +++ b/test/backend_test.in.cpp-9bfce850 @@ -0,0 +1,5571 @@ +//***************************************************************************** +// Copyright 2017-2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#include +#include +#include +#include +#include +#include +#include +#include "gtest/gtest.h" + +#include "ngraph/autodiff/adjoints.hpp" +#include "ngraph/graph_util.hpp" +#include "ngraph/log.hpp" +#include "ngraph/ngraph.hpp" +#include "ngraph/op/experimental/generate_mask.hpp" +#include "ngraph/serializer.hpp" +#include "ngraph/state/rng_state.hpp" +#include "util/all_close.hpp" +#include "util/all_close_f.hpp" +#include "util/ndarray.hpp" +#include "util/random.hpp" +#include "util/test_control.hpp" +#include "util/test_tools.hpp" + +using namespace std; +using namespace ngraph; + +static string s_manifest = "${MANIFEST}"; + +static const vector s_known_element_types = {element::from(), + element::from(), + element::from(), + element::from(), + element::from(), + element::from(), + element::from(), + element::from(), + element::from(), + element::from()}; + +class UnhandledOp : public ngraph::op::Op +{ +public: + UnhandledOp(const std::shared_ptr& arg) + : Op("Unsupported_op", check_single_output_args({arg})) + { + constructor_validate_and_infer_types(); + } + shared_ptr copy_with_new_args(const NodeVector& new_args) const override + { + return make_shared(new_args[0]); + } + +protected: + void validate_and_infer_types() override + { + set_output_type(0, get_input_element_type(0), get_input_partial_shape(0)); + } +}; + +NGRAPH_TEST(${BACKEND_NAME}, unhandled_op) +{ + Shape shape{2, 2}; + auto A = make_shared(element::f32, shape); + auto unhandled = make_shared(A); + auto f = make_shared(unhandled, op::ParameterVector{A}); + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + shared_ptr a = backend->create_tensor(shape); + shared_ptr result = backend->create_tensor(shape); + ASSERT_THROW(backend->call_with_validate(f, {result}, {a}), unsupported_op); +} + +NGRAPH_TEST(${BACKEND_NAME}, function_name) +{ + Shape shape{2, 2}; + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto f = make_shared(A + B, op::ParameterVector{A, B}, "funky func name"); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + shared_ptr a = backend->create_tensor(shape); + shared_ptr b = backend->create_tensor(shape); + shared_ptr result = backend->create_tensor(shape); + + copy_data(a, test::NDArray({{1, 2}, {3, 4}}).get_vector()); + copy_data(b, test::NDArray({{5, 6}, {7, 8}}).get_vector()); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ(read_vector(result), + (test::NDArray({{6, 8}, {10, 12}})).get_vector()); +} + +NGRAPH_TEST(${BACKEND_NAME}, node_name) +{ + Shape shape{2, 2}; + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto C = A + B; + C->set_name("a node name"); + auto f = make_shared(C, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + shared_ptr a = backend->create_tensor(element::f32, shape); + shared_ptr b = backend->create_tensor(element::f32, shape); + shared_ptr result = backend->create_tensor(element::f32, shape); + + copy_data(a, test::NDArray({{1, 2}, {3, 4}}).get_vector()); + copy_data(b, test::NDArray({{5, 6}, {7, 8}}).get_vector()); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ(read_vector(result), + (test::NDArray({{6, 8}, {10, 12}})).get_vector()); +} + +NGRAPH_TEST(${BACKEND_NAME}, aliased_output) +{ + Shape shape{2, 2}; + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto C = A + B; + auto D = A * B; + auto E = op::Constant::create(element::f32, shape, {1, 2, 3, 4}); + auto f = make_shared(NodeVector{C, C, D, D, C, E, E}, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + shared_ptr a = backend->create_tensor(element::f32, shape); + shared_ptr b = backend->create_tensor(element::f32, shape); + shared_ptr out1 = backend->create_tensor(element::f32, shape); + shared_ptr out2 = backend->create_tensor(element::f32, shape); + shared_ptr out3 = backend->create_tensor(element::f32, shape); + shared_ptr out4 = backend->create_tensor(element::f32, shape); + shared_ptr out5 = backend->create_tensor(element::f32, shape); + shared_ptr out6 = backend->create_tensor(element::f32, shape); + shared_ptr out7 = backend->create_tensor(element::f32, shape); + + copy_data(a, vector{0, 1, 2, 3}); + copy_data(b, vector{1, 2, 3, 4}); + vector expectedC{1, 3, 5, 7}; + vector expectedD{0, 2, 6, 12}; + vector expectedE{1, 2, 3, 4}; + + backend->call_with_validate(f, {out1, out2, out3, out4, out5, out6, out7}, {a, b}); + EXPECT_EQ(expectedC, read_vector(out1)); + EXPECT_EQ(expectedC, read_vector(out2)); + EXPECT_EQ(expectedD, read_vector(out3)); + EXPECT_EQ(expectedD, read_vector(out4)); + EXPECT_EQ(expectedC, read_vector(out5)); + EXPECT_EQ(expectedE, read_vector(out6)); + EXPECT_EQ(expectedE, read_vector(out7)); +} + +NGRAPH_TEST(${BACKEND_NAME}, parameter_as_output) +{ + Shape shape{3, 4}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(A, op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + shared_ptr a = backend->create_tensor(element::f32, shape); + shared_ptr result = backend->create_tensor(element::f32, shape); + + vector expected{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + vector zero(shape_size(shape), 0); + copy_data(a, expected); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ(read_vector(result), expected); +} + +NGRAPH_TEST(${BACKEND_NAME}, abc) +{ + Shape shape{2, 2}; + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto C = make_shared(element::f32, shape); + auto f = make_shared((A + B) * C, op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + shared_ptr a = backend->create_tensor(element::f32, shape); + shared_ptr b = backend->create_tensor(element::f32, shape); + shared_ptr c = backend->create_tensor(element::f32, shape); + shared_ptr result = backend->create_tensor(element::f32, shape); + + copy_data(a, test::NDArray({{1, 2}, {3, 4}}).get_vector()); + copy_data(b, test::NDArray({{5, 6}, {7, 8}}).get_vector()); + copy_data(c, test::NDArray({{9, 10}, {11, 12}}).get_vector()); + + backend->call_with_validate(f, {result}, {a, b, c}); + EXPECT_EQ(read_vector(result), + (test::NDArray({{54, 80}, {110, 144}})).get_vector()); + + backend->call_with_validate(f, {result}, {b, a, c}); + EXPECT_EQ(read_vector(result), + (test::NDArray({{54, 80}, {110, 144}})).get_vector()); + + backend->call_with_validate(f, {result}, {a, c, b}); + EXPECT_EQ(read_vector(result), + (test::NDArray({{50, 72}, {98, 128}})).get_vector()); +} + +NGRAPH_TEST(${BACKEND_NAME}, abc_int64) +{ + Shape shape{2, 2}; + auto A = make_shared(element::i64, shape); + auto B = make_shared(element::i64, shape); + auto C = make_shared(element::i64, shape); + auto f = make_shared((A + B) * C, op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::i64, shape); + copy_data(a, vector{1, 2, 3, 4}); + auto b = backend->create_tensor(element::i64, shape); + copy_data(b, vector{5, 6, 7, 8}); + auto c = backend->create_tensor(element::i64, shape); + copy_data(c, vector{9, 10, 11, 12}); + auto result = backend->create_tensor(element::i64, shape); + + backend->call_with_validate(f, {result}, {a, b, c}); + EXPECT_EQ((vector{54, 80, 110, 144}), read_vector(result)); + + backend->call_with_validate(f, {result}, {b, a, c}); + EXPECT_EQ((vector{54, 80, 110, 144}), read_vector(result)); + + backend->call_with_validate(f, {result}, {a, c, b}); + EXPECT_EQ((vector{50, 72, 98, 128}), read_vector(result)); +} + +// Multiple retrive values +NGRAPH_TEST(${BACKEND_NAME}, multiple_result) +{ + Shape shape{2, 2}; + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto C = make_shared(element::f32, shape); + auto A_add_B = make_shared(A, B); + auto A_add_B_mul_C = make_shared(A_add_B, C); + + auto f = + make_shared(NodeVector{A_add_B, A_add_B_mul_C}, op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 2, 3, 4}); + auto b = backend->create_tensor(element::f32, shape); + copy_data(b, vector{5, 6, 7, 8}); + auto c = backend->create_tensor(element::f32, shape); + copy_data(c, vector{9, 10, 11, 12}); + + auto r0 = backend->create_tensor(element::f32, shape); + auto r1 = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {r0, r1}, {a, b, c}); + + EXPECT_EQ((vector{6, 8, 10, 12}), read_vector(r0)); + EXPECT_EQ((vector{54, 80, 110, 144}), read_vector(r1)); +} + +NGRAPH_TEST(${BACKEND_NAME}, batch_norm_one_output) +{ + auto shape_in = Shape{2, 3}; + auto shape_mean = Shape{3}; + + auto A = make_shared(element::f64, shape_in); + auto Mean = + op::Constant::create(element::f64, shape_mean, {0.00396654, -1.25294404, 1.16651872}); + auto Variance = + op::Constant::create(element::f64, shape_mean, {2.40871689, 1.44969511, 0.23469392}); + auto Beta = + op::Constant::create(element::f64, shape_mean, {2.14211921, -0.75733924, 0.42210531}); + auto Gamma = + op::Constant::create(element::f64, shape_mean, {1.75437676, 0.37950502, 1.13727544}); + + auto BN = make_shared(A, Gamma, Beta, Mean, Variance, 1e-3); + auto f = make_shared(BN, op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f64, shape_in); + copy_data( + a, + vector{-1.97431703, -2.06521307, 0.54122217, 2.53375939, -0.22342691, 0.45340773}); + + auto result = backend->create_tensor(element::f64, shape_in); + vector expected_result{ + -0.09365749, -1.01327395, -1.04269195, 5.00118923, -0.43295258, -1.24840283}; + + backend->call_with_validate(f, {result}, {a}); + EXPECT_TRUE(test::all_close(vector{expected_result}, read_vector(result))); +} + +NGRAPH_TEST(${BACKEND_NAME}, batch_norm_three_outputs) +{ + auto shape_in = Shape{2, 3}; + auto shape_mean = Shape{3}; + + auto A = make_shared(element::f64, shape_in); + auto Beta = + op::Constant::create(element::f64, shape_mean, {2.14211921, -0.75733924, 0.42210531}); + auto Gamma = + op::Constant::create(element::f64, shape_mean, {1.75437676, 0.37950502, 1.13727544}); + + auto BN = make_shared(A, Gamma, Beta, 1e-3); + + auto f0 = + make_shared(make_shared(BN, 0), op::ParameterVector{A}); + auto f1 = + make_shared(make_shared(BN, 1), op::ParameterVector{A}); + auto f2 = + make_shared(make_shared(BN, 2), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f64, shape_in); + copy_data( + a, + vector{-1.97431703, -2.06521307, 0.54122217, 2.53375939, -0.22342691, 0.45340773}); + + auto result0 = backend->create_tensor(element::f64, shape_in); + vector expected_result0{ + 0.3879149, -1.13662076, 1.34494817, 3.89632344, -0.37805778, -0.50073695}; + + backend->call_with_validate(f0, {result0}, {a}); + EXPECT_TRUE(test::all_close(vector{expected_result0}, read_vector(result0))); + + auto result1 = backend->create_tensor(element::f64, shape_mean); + vector expected_result1{0.27972114, -1.14431989, 0.49731493}; + + backend->call_with_validate(f1, {result1}, {a}); + EXPECT_TRUE(test::all_close(vector{expected_result1}, read_vector(result1))); + + auto result2 = backend->create_tensor(element::f64, shape_mean); + vector expected_result2{5.08068895e+00, 8.48043919e-01, 1.92784308e-03}; + + backend->call_with_validate(f2, {result2}, {a}); + EXPECT_TRUE(test::all_close(vector{expected_result2}, read_vector(result2))); +} + +NGRAPH_TEST(${BACKEND_NAME}, concat_matrix_colwise) +{ + Shape shape_a{2, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{2, 3}; + auto B = make_shared(element::f32, shape_b); + Shape shape_c{2, 3}; + auto C = make_shared(element::f32, shape_c); + Shape shape_r{2, 8}; + auto f = make_shared(make_shared(NodeVector{A, B, C}, 1), + op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{2, 4, 8, 16}); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{1, 2, 4, 8, 16, 32}); + auto c = backend->create_tensor(element::f32, shape_c); + copy_data(c, vector{2, 3, 5, 7, 11, 13}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b, c}); + EXPECT_EQ((vector{2, 4, 1, 2, 4, 2, 3, 5, 8, 16, 8, 16, 32, 7, 11, 13}), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, concat_matrix_rowwise) +{ + Shape shape_a{2, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{3, 2}; + auto B = make_shared(element::f32, shape_b); + Shape shape_c{3, 2}; + auto C = make_shared(element::f32, shape_c); + Shape shape_r{8, 2}; + auto f = make_shared(make_shared(NodeVector{A, B, C}, 0), + op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{2, 4, 8, 16}); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{1, 2, 4, 8, 16, 32}); + auto c = backend->create_tensor(element::f32, shape_c); + copy_data(c, vector{2, 3, 5, 7, 11, 13}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b, c}); + EXPECT_EQ((vector{2, 4, 8, 16, 1, 2, 4, 8, 16, 32, 2, 3, 5, 7, 11, 13}), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, concat_matrix_int64) +{ + Shape shape_a{2, 2}; + auto A = make_shared(element::i64, shape_a); + Shape shape_b{3, 2}; + auto B = make_shared(element::i64, shape_b); + Shape shape_c{3, 2}; + auto C = make_shared(element::i64, shape_c); + Shape shape_r{8, 2}; + auto f = make_shared(make_shared(NodeVector{A, B, C}, 0), + op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::i64, shape_a); + copy_data(a, vector{2, 4, 8, 16}); + auto b = backend->create_tensor(element::i64, shape_b); + copy_data(b, vector{1, 2, 4, 8, 16, 32}); + auto c = backend->create_tensor(element::i64, shape_c); + copy_data(c, vector{2, 3, 5, 7, 11, 13}); + auto result = backend->create_tensor(element::i64, shape_r); + + backend->call_with_validate(f, {result}, {a, b, c}); + EXPECT_EQ((vector{2, 4, 8, 16, 1, 2, 4, 8, 16, 32, 2, 3, 5, 7, 11, 13}), + read_vector(result)); +} + +// Params to drive concat_vector_large testing variations +class concat_vector_params : public ::testing::TestWithParam +{ +protected: + concat_vector_params() { num_inputs = GetParam(); } + uint32_t num_inputs; +}; + +NGRAPH_TEST_P(${BACKEND_NAME}, concat_vector_params, concat_vector_large) +{ + Shape shape_a{1}; + NodeVector inputs; + op::ParameterVector inputs_param; + for (uint32_t i = 0; i < num_inputs; i++) + { + auto A = make_shared(element::f32, shape_a); + inputs_param.push_back(A); + inputs.push_back(A); + } + Shape shape_r{num_inputs}; + auto f = make_shared(make_shared(inputs, 0), inputs_param); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + std::vector> inputs_value; + std::vector ref_result; + for (uint32_t i = 0; i < num_inputs; i++) + { + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{static_cast(i)}); + ref_result.push_back(static_cast(i)); + inputs_value.push_back(a); + } + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, inputs_value); + EXPECT_EQ(ref_result, read_vector(result)); +} + +// concat_vector_large case generation +// Add thhosw tests to cover paramter space overflow: +// cuda kernel parameter space have limit, if there is large number of parameters, +// there will be overflow for parameter space. +NGRAPH_INSTANTIATE_TEST_CASE_P(${BACKEND_NAME}, + input_sizes, + concat_vector_params, + testing::Values(100, 128, 999)); + +NGRAPH_TEST(${BACKEND_NAME}, concat_vector) +{ + Shape shape_a{4}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{6}; + auto B = make_shared(element::f32, shape_b); + Shape shape_c{2}; + auto C = make_shared(element::f32, shape_c); + Shape shape_r{12}; + auto f = make_shared(make_shared(NodeVector{A, B, C}, 0), + op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{2, 4, 8, 16}); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{1, 2, 4, 8, 16, 32}); + auto c = backend->create_tensor(element::f32, shape_c); + copy_data(c, vector{18, 19}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b, c}); + EXPECT_EQ((vector{2, 4, 8, 16, 1, 2, 4, 8, 16, 32, 18, 19}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, concat_4d_tensor) +{ + Shape shape{1, 1, 1, 1}; + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto C = make_shared(element::f32, shape); + Shape shape_r{3, 1, 1, 1}; + auto f = make_shared(make_shared(NodeVector{A, B, C}, 0), + op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1}); + auto b = backend->create_tensor(element::f32, shape); + copy_data(b, vector{2}); + auto c = backend->create_tensor(element::f32, shape); + copy_data(c, vector{3}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b, c}); + EXPECT_EQ((vector{1, 2, 3}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, concat_2d_tensor) +{ + Shape shape{1, 1}; + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto C = make_shared(element::f32, shape); + Shape shape_r{3, 1}; + auto f = make_shared(make_shared(NodeVector{A, B, C}, 0), + op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1}); + auto b = backend->create_tensor(element::f32, shape); + copy_data(b, vector{2}); + auto c = backend->create_tensor(element::f32, shape); + copy_data(c, vector{3}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b, c}); + EXPECT_EQ((vector{1, 2, 3}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_2d_tensor) +{ + Shape shape{1, 1}; + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto add1 = make_shared(A, B); + auto C = make_shared(element::f32, shape); + auto D = make_shared(element::f32, shape); + auto add2 = make_shared(C, D); + auto subtract = make_shared(C, A); + Shape shape_r{3, 1}; + auto f = make_shared(make_shared(NodeVector{add1, add2, subtract}, 0), + op::ParameterVector{A, B, C, D}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1}); + auto b = backend->create_tensor(element::f32, shape); + copy_data(b, vector{2}); + auto c = backend->create_tensor(element::f32, shape); + copy_data(c, vector{3}); + auto d = backend->create_tensor(element::f32, shape); + copy_data(d, vector{4}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b, c, d}); + EXPECT_EQ((vector{3, 7, 2}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_propagate_2d_tensor) +{ + Shape shape{1, 1}; + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto add1 = make_shared(A, B); + auto C = make_shared(element::f32, shape); + auto D = make_shared(element::f32, shape); + auto add2 = make_shared(C, D); + auto concat1 = make_shared(NodeVector{add1, add2}, 0); + auto subtract = make_shared(C, A); + Shape shape_r{3, 1}; + auto f = make_shared(make_shared(NodeVector{concat1, subtract}, 0), + op::ParameterVector{A, B, C, D}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1}); + auto b = backend->create_tensor(element::f32, shape); + copy_data(b, vector{2}); + auto c = backend->create_tensor(element::f32, shape); + copy_data(c, vector{3}); + auto d = backend->create_tensor(element::f32, shape); + copy_data(d, vector{4}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b, c, d}); + EXPECT_EQ((vector{3, 7, 2}), read_vector(result)); +} + +// from numpy import * +// a=linspace(1,2*3*4*3*2,2*3*4*3*2) +// b=linspace(1000+1,1000+2*3*3*3*2,2*3*3*3*2) +// c=linspace(2000+1,2000+2*3*2*3*2,2*3*2*3*2) +// a.shape=(2,3,4,3,2) +// b.shape=(2,3,3,3,2) +// c.shape=(2,3,2,3,2) +// z=concatenate((a,b,c),axis=2) +// z.shape=(2*3*(4+3+2)*3*2) +// set_printoptions(suppress=True) +// print(z) +// +// [ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. +// 11. 12. 13. 14. 15. 16. 17. 18. 19. 20. +// 21. 22. 23. 24. 1001. 1002. 1003. 1004. 1005. 1006. +// 1007. 1008. 1009. 1010. 1011. 1012. 1013. 1014. 1015. 1016. +// 1017. 1018. 2001. 2002. 2003. 2004. 2005. 2006. 2007. 2008. +// 2009. 2010. 2011. 2012. 25. 26. 27. 28. 29. 30. +// 31. 32. 33. 34. 35. 36. 37. 38. 39. 40. +// 41. 42. 43. 44. 45. 46. 47. 48. 1019. 1020. +// 1021. 1022. 1023. 1024. 1025. 1026. 1027. 1028. 1029. 1030. +// 1031. 1032. 1033. 1034. 1035. 1036. 2013. 2014. 2015. 2016. +// 2017. 2018. 2019. 2020. 2021. 2022. 2023. 2024. 49. 50. +// 51. 52. 53. 54. 55. 56. 57. 58. 59. 60. +// 61. 62. 63. 64. 65. 66. 67. 68. 69. 70. +// 71. 72. 1037. 1038. 1039. 1040. 1041. 1042. 1043. 1044. +// 1045. 1046. 1047. 1048. 1049. 1050. 1051. 1052. 1053. 1054. +// 2025. 2026. 2027. 2028. 2029. 2030. 2031. 2032. 2033. 2034. +// 2035. 2036. 73. 74. 75. 76. 77. 78. 79. 80. +// 81. 82. 83. 84. 85. 86. 87. 88. 89. 90. +// 91. 92. 93. 94. 95. 96. 1055. 1056. 1057. 1058. +// 1059. 1060. 1061. 1062. 1063. 1064. 1065. 1066. 1067. 1068. +// 1069. 1070. 1071. 1072. 2037. 2038. 2039. 2040. 2041. 2042. +// 2043. 2044. 2045. 2046. 2047. 2048. 97. 98. 99. 100. +// 101. 102. 103. 104. 105. 106. 107. 108. 109. 110. +// 111. 112. 113. 114. 115. 116. 117. 118. 119. 120. +// 1073. 1074. 1075. 1076. 1077. 1078. 1079. 1080. 1081. 1082. +// 1083. 1084. 1085. 1086. 1087. 1088. 1089. 1090. 2049. 2050. +// 2051. 2052. 2053. 2054. 2055. 2056. 2057. 2058. 2059. 2060. +// 121. 122. 123. 124. 125. 126. 127. 128. 129. 130. +// 131. 132. 133. 134. 135. 136. 137. 138. 139. 140. +// 141. 142. 143. 144. 1091. 1092. 1093. 1094. 1095. 1096. +// 1097. 1098. 1099. 1100. 1101. 1102. 1103. 1104. 1105. 1106. +// 1107. 1108. 2061. 2062. 2063. 2064. 2065. 2066. 2067. 2068. +// 2069. 2070. 2071. 2072.] +NGRAPH_TEST(${BACKEND_NAME}, concat_5d) +{ + vector a_data(2 * 3 * 4 * 3 * 2); + for (int i = 0; i < 2 * 3 * 4 * 3 * 2; i++) + { + a_data[i] = float(i + 1); + } + + vector b_data(2 * 3 * 3 * 3 * 2); + for (int i = 0; i < 2 * 3 * 3 * 3 * 2; i++) + { + b_data[i] = 1000 + float(i + 1); + } + + vector c_data(2 * 3 * 2 * 3 * 2); + for (int i = 0; i < 2 * 3 * 2 * 3 * 2; i++) + { + c_data[i] = 2000 + float(i + 1); + } + + Shape shape_a{2, 3, 4, 3, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{2, 3, 3, 3, 2}; + auto B = make_shared(element::f32, shape_b); + Shape shape_c{2, 3, 2, 3, 2}; + auto C = make_shared(element::f32, shape_c); + Shape shape_r{2, 3, 9, 3, 2}; + + auto r = make_shared(NodeVector{A, B, C}, 2); + auto f = make_shared(r, op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, a_data); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, b_data); + auto c = backend->create_tensor(element::f32, shape_c); + copy_data(c, c_data); + + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b, c}); + EXPECT_EQ( + (vector{ + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., + 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., + 1001., 1002., 1003., 1004., 1005., 1006., 1007., 1008., 1009., 1010., 1011., 1012., + 1013., 1014., 1015., 1016., 1017., 1018., 2001., 2002., 2003., 2004., 2005., 2006., + 2007., 2008., 2009., 2010., 2011., 2012., 25., 26., 27., 28., 29., 30., + 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., 42., + 43., 44., 45., 46., 47., 48., 1019., 1020., 1021., 1022., 1023., 1024., + 1025., 1026., 1027., 1028., 1029., 1030., 1031., 1032., 1033., 1034., 1035., 1036., + 2013., 2014., 2015., 2016., 2017., 2018., 2019., 2020., 2021., 2022., 2023., 2024., + 49., 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., + 61., 62., 63., 64., 65., 66., 67., 68., 69., 70., 71., 72., + 1037., 1038., 1039., 1040., 1041., 1042., 1043., 1044., 1045., 1046., 1047., 1048., + 1049., 1050., 1051., 1052., 1053., 1054., 2025., 2026., 2027., 2028., 2029., 2030., + 2031., 2032., 2033., 2034., 2035., 2036., 73., 74., 75., 76., 77., 78., + 79., 80., 81., 82., 83., 84., 85., 86., 87., 88., 89., 90., + 91., 92., 93., 94., 95., 96., 1055., 1056., 1057., 1058., 1059., 1060., + 1061., 1062., 1063., 1064., 1065., 1066., 1067., 1068., 1069., 1070., 1071., 1072., + 2037., 2038., 2039., 2040., 2041., 2042., 2043., 2044., 2045., 2046., 2047., 2048., + 97., 98., 99., 100., 101., 102., 103., 104., 105., 106., 107., 108., + 109., 110., 111., 112., 113., 114., 115., 116., 117., 118., 119., 120., + 1073., 1074., 1075., 1076., 1077., 1078., 1079., 1080., 1081., 1082., 1083., 1084., + 1085., 1086., 1087., 1088., 1089., 1090., 2049., 2050., 2051., 2052., 2053., 2054., + 2055., 2056., 2057., 2058., 2059., 2060., 121., 122., 123., 124., 125., 126., + 127., 128., 129., 130., 131., 132., 133., 134., 135., 136., 137., 138., + 139., 140., 141., 142., 143., 144., 1091., 1092., 1093., 1094., 1095., 1096., + 1097., 1098., 1099., 1100., 1101., 1102., 1103., 1104., 1105., 1106., 1107., 1108., + 2061., 2062., 2063., 2064., 2065., 2066., 2067., 2068., 2069., 2070., 2071., 2072.}), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, concat_zero_length_1d_last) +{ + Shape shape_a{4}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{0}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{4}; + + auto r = make_shared(NodeVector{A, B}, 0); + auto f = make_shared(r, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + vector a_data{1, 2, 3, 4}; + vector b_data(0); + + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, a_data); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, b_data); + + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, concat_zero_length_1d_middle) +{ + Shape shape_a{4}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{0}; + auto B = make_shared(element::f32, shape_b); + Shape shape_c{4}; + auto C = make_shared(element::f32, shape_c); + Shape shape_r{8}; + + auto r = make_shared(NodeVector{A, B, C}, 0); + auto f = make_shared(r, op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + vector a_data{1, 2, 3, 4}; + vector b_data(0); + vector c_data{5, 6, 7, 8}; + + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, a_data); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, b_data); + auto c = backend->create_tensor(element::f32, shape_c); + copy_data(c, c_data); + + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b, c}); + EXPECT_EQ((vector{1, 2, 3, 4, 5, 6, 7, 8}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, concat_zero_length_4d_middle) +{ + Shape shape_a{2, 2, 1, 1}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{2, 2, 0, 1}; + auto B = make_shared(element::f32, shape_b); + Shape shape_c{2, 2, 1, 1}; + auto C = make_shared(element::f32, shape_c); + Shape shape_r{2, 2, 2, 1}; + + auto r = make_shared(NodeVector{A, B, C}, 2); + auto f = make_shared(r, op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + vector a_data{1, 2, 3, 4}; + vector b_data(0); + vector c_data{5, 6, 7, 8}; + + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, a_data); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, b_data); + auto c = backend->create_tensor(element::f32, shape_c); + copy_data(c, c_data); + + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b, c}); + EXPECT_EQ((vector{1, 5, 2, 6, 3, 7, 4, 8}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, lrn) +{ + Shape shape{2, 3, 2, 1}; + auto A = make_shared(element::f32, shape); + auto lrn = make_shared(A, 1., 2., 1., 3); + auto f = make_shared(lrn, op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + vector args{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, args); + + auto result = backend->create_tensor(element::f32, shape); + backend->call_with_validate(f, {result}, {a}); + + vector expected{0.f, + 0.05325444f, + 0.03402646f, + 0.01869806f, + 0.06805293f, + 0.03287071f, + 0.00509002f, + 0.00356153f, + 0.00174719f, + 0.0012555f, + 0.00322708f, + 0.00235574f}; + EXPECT_TRUE(test::all_close_f(expected, read_vector(result))); +} + +NGRAPH_TEST(${BACKEND_NAME}, select) +{ + Shape shape{2, 2, 2}; + auto A = make_shared(element::boolean, shape); + auto B = make_shared(element::f32, shape); + auto C = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, B, C), op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::boolean, shape); + copy_data(a, vector{0, 1, 1, 0, 0, 1, 0, 1}); + auto b = backend->create_tensor(element::f32, shape); + copy_data(b, vector{1, 2, 3, 4, 5, 6, 7, 8}); + auto c = backend->create_tensor(element::f32, shape); + copy_data(c, vector{11, 12, 13, 14, 15, 16, 17, 18}); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a, b, c}); + EXPECT_EQ((vector{11, 2, 3, 14, 15, 6, 17, 8}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, tensor_constant) +{ + Shape shape{2, 2, 2}; + auto A = op::Constant::create(element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8}); + auto f = make_shared(A, op::ParameterVector{}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {}); + EXPECT_EQ((vector{1, 2, 3, 4, 5, 6, 7, 8}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, tensor_2constant) +{ + Shape shape{2, 2, 2}; + auto A = op::Constant::create(element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8}); + auto f = make_shared(NodeVector{A, A}, op::ParameterVector{}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto result0 = backend->create_tensor(element::f32, shape); + auto result1 = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result0, result1}, {}); + EXPECT_EQ((vector{1, 2, 3, 4, 5, 6, 7, 8}), read_vector(result0)); + EXPECT_EQ((vector{1, 2, 3, 4, 5, 6, 7, 8}), read_vector(result1)); +} + +NGRAPH_TEST(${BACKEND_NAME}, tensor_constant_with_op) +{ + Shape shape{2, 2, 2}; + auto A = op::Constant::create(element::f32, shape, {-1, 2, 3, -4, 5, -6, -7, 8}); + auto f = make_shared(make_shared(A), op::ParameterVector{}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {}); + EXPECT_EQ((vector{1, 2, 3, 4, 5, 6, 7, 8}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, constant_multi_use) +{ + auto A = make_shared(element::i32, Shape{}, std::vector{"388"}); + auto f = make_shared(A, op::ParameterVector{}); + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + std::shared_ptr r1 = backend->create_tensor(element::i32, Shape{}); + backend->call_with_validate(f, {r1}, std::vector>{}); + EXPECT_EQ(read_vector(r1), std::vector{388}); + + std::shared_ptr r2 = backend->create_tensor(element::i32, Shape{}); + backend->call_with_validate(f, {r2}, std::vector>{}); + EXPECT_EQ(read_vector(r2), std::vector{388}); +} + +NGRAPH_TEST(${BACKEND_NAME}, function_call) +{ + // First create "f(A,B,C) = (A+B)*C". + Shape shape{2, 2}; + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto C = make_shared(element::f32, shape); + auto f = make_shared((A + B) * C, op::ParameterVector{A, B, C}); + + // Now make "g(X,Y,Z) = f(X,Y,Z) + f(X,Y,Z)" + auto X = make_shared(element::f32, shape); + auto Y = make_shared(element::f32, shape); + auto Z = make_shared(element::f32, shape); + auto g = + make_shared(make_shared(f, NodeVector{X + Y, Y + Z, Z + X}) + + make_shared(f, NodeVector{X, Y, Z}), + op::ParameterVector{X, Y, Z}); + + // Now call g on some test vectors. + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto x = backend->create_tensor(element::f32, shape); + copy_data(x, vector{1, 2, 3, 4}); + auto y = backend->create_tensor(element::f32, shape); + copy_data(y, vector{5, 6, 7, 8}); + auto z = backend->create_tensor(element::f32, shape); + copy_data(z, vector{9, 10, 11, 12}); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(g, {result}, {x, y, z}); + EXPECT_EQ((vector{254, 368, 502, 656}), read_vector(result)); + + backend->call_with_validate(g, {result}, {y, x, z}); + EXPECT_EQ((vector{278, 400, 542, 704}), read_vector(result)); + + backend->call_with_validate(g, {result}, {x, z, y}); + EXPECT_EQ((vector{194, 296, 418, 560}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, convert_int32_float32) +{ + Shape shape{2, 2}; + auto A = make_shared(element::i32, shape); + auto f = + make_shared(make_shared(A, element::f32), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::i32, shape); + copy_data(a, vector{1, 2, 3, 4}); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, convert_uint16_float32) +{ + Shape shape{2, 2}; + auto A = make_shared(element::u16, shape); + auto f = + make_shared(make_shared(A, element::f32), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::u16, shape); + copy_data(a, vector{1, 2, 3, 4}); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, convert_int32_bool) +{ + Shape shape{2, 2}; + auto A = make_shared(element::i32, shape); + auto f = make_shared(make_shared(A, element::boolean), + op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::i32, shape); + copy_data(a, vector{1, 2, 3, 4}); + auto result = backend->create_tensor(element::boolean, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, convert_float32_bool) +{ + Shape shape{2, 2}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, element::boolean), + op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 2, 3, 4}); + auto result = backend->create_tensor(element::boolean, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, slice_scalar) +{ + Shape shape_a{}; + auto A = make_shared(element::f32, shape_a); + Shape shape_r{}; + auto r = make_shared(A, Coordinate{}, Coordinate{}); + auto f = make_shared(r, op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{312}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{312}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, slice_matrix) +{ + Shape shape_a{4, 4}; + auto A = make_shared(element::f32, shape_a); + Shape shape_r{3, 2}; + auto r = make_shared(A, Coordinate{0, 1}, Coordinate{3, 3}); + auto f = make_shared(r, op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{2, 3, 6, 7, 10, 11}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, slice_vector) +{ + Shape shape_a{16}; + auto A = make_shared(element::f32, shape_a); + Shape shape_r{12}; + auto r = make_shared(A, Coordinate{2}, Coordinate{14}); + auto f = make_shared(r, op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, slice_matrix_axis_0_overlap) +{ + Shape shape_a{4, 4}; + auto A = make_shared(element::f32, shape_a); + auto B = make_shared(element::f32, shape_a); + auto C = make_shared(A, B); + Shape shape_r{2, 4}; + auto D = make_shared(C, Coordinate{0, 0}, Coordinate{2, 4}); + auto E = make_shared(C, Coordinate{1, 0}, Coordinate{3, 4}); + auto r = make_shared(D, E); + auto f = make_shared(r, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); + auto b = backend->create_tensor(element::f32, shape_a); + copy_data(b, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((vector{12, 16, 20, 24, 28, 32, 36, 40}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, slice_matrix_strided) +{ + Shape shape_a{4, 4}; + auto A = make_shared(element::f32, shape_a); + Shape shape_r{2, 2}; + auto r = make_shared(A, Coordinate{1, 0}, Coordinate{4, 4}, Strides{2, 3}); + auto f = make_shared(r, op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{4, 7, 12, 15}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, slice_3d) +{ + Shape shape_a{4, 4, 4}; + auto A = make_shared(element::f32, shape_a); + Shape shape_r{2, 2, 2}; + auto r = make_shared(A, Coordinate{1, 1, 1}, Coordinate{3, 3, 3}); + auto f = make_shared(r, op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{21, 22, 25, 26, 37, 38, 41, 42}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, slice_3d_strided) +{ + Shape shape_a{4, 4, 4}; + auto A = make_shared(element::f32, shape_a); + Shape shape_r{2, 2, 2}; + auto r = make_shared(A, Coordinate{0, 0, 0}, Coordinate{4, 4, 4}, Strides{2, 2, 2}); + auto f = make_shared(r, op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{0, 2, 8, 10, 32, 34, 40, 42}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, slice_3d_strided_different_strides) +{ + Shape shape_a{4, 4, 4}; + auto A = make_shared(element::f32, shape_a); + Shape shape_r{2, 2, 2}; + auto r = make_shared(A, Coordinate{0, 0, 0}, Coordinate{4, 4, 4}, Strides{2, 2, 3}); + auto f = make_shared(r, op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{0, 3, 8, 11, 32, 35, 40, 43}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, scalar_constant_float32) +{ + auto r = op::Constant::create(element::f32, Shape{}, {4.75}); + auto f = make_shared(r, op::ParameterVector{}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto result = backend->create_tensor(element::f32, Shape{}); + + backend->call_with_validate(f, {result}, {}); + EXPECT_EQ(vector{4.75f}, read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, scalar_constant_int64) +{ + auto r = op::Constant::create(element::i64, Shape{}, {2112}); + auto f = make_shared(r, op::ParameterVector{}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto result = backend->create_tensor(element::i64, Shape{}); + + backend->call_with_validate(f, {result}, {}); + EXPECT_EQ(vector{2112}, read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, tensor_constant_float32) +{ + Shape shape{2, 2}; + auto r = op::Constant::create(element::f32, shape, {4.75, 4.5, -5.25, 0.0}); + auto f = make_shared(r, op::ParameterVector{}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {}); + EXPECT_EQ((vector{4.75f, 4.5f, -5.25f, 0.0f}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, tensor_constant_int64) +{ + Shape shape{2, 2}; + auto r = op::Constant::create(element::i64, shape, {2112, 1848, 1776, 1964}); + auto f = make_shared(r, op::ParameterVector{}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto result = backend->create_tensor(element::i64, shape); + + backend->call_with_validate(f, {result}, {}); + EXPECT_EQ((vector{2112, 1848, 1776, 1964}), read_vector(result)); +} + +// TODO: Kahan sum only works in limited cases with CPU / Interpreter backend +NGRAPH_TEST(${BACKEND_NAME}, kahan_sum_to_scalar) +{ + Shape shape{2, 2}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + float epsilon = 9.5367431640625e-7f; + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{epsilon, -1.f, 0.f, 1.f}); + auto result = backend->create_tensor(element::f32, Shape{}); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_TRUE(test::all_close_f(vector{epsilon}, read_vector(result))); +} + +// TODO: Kahan sum only works in limited cases with CPU / Interpreter backend +NGRAPH_TEST(${BACKEND_NAME}, kahan_sum_3d_to_vector) +{ + Shape shape_a{3, 3, 3}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3}; + auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + float epsilon_a = 1.220703125e-4f; + float epsilon_b = 3.0517578125e-5f; + float epsilon_c = 7.62939453125e-6f; + copy_data(a, vector{1, 1, 1, 1, 1, 1, epsilon_a, epsilon_b, epsilon_c, + 1, 1, 1, 1, 1, 1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_TRUE(test::all_close_f(vector{epsilon_a, epsilon_b, epsilon_c}, + read_vector(result))); +} + +NGRAPH_TEST(${BACKEND_NAME}, constant_equality_bool) +{ + Shape shape{4}; + // auto A = make_shared(element::boolean, shape); + // auto B = make_shared(element::boolean, shape); + // auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); + + auto A = op::Constant::create(element::boolean, shape, {true, false, true, false}); + auto B = op::Constant::create(element::boolean, shape, {true, true, true, true}); + auto f = make_shared(make_shared(A, B), op::ParameterVector{}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto result = backend->create_tensor(element::boolean, shape); + + backend->call_with_validate(f, {result}, {}); + EXPECT_EQ((vector{true, false, true, false}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, replace_slice_scalar) +{ + Shape shape_a{}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{}; + auto r = make_shared(A, B, Coordinate{}, Coordinate{}); + auto f = make_shared(r, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{312}); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{808}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((vector{808}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, replace_slice_matrix_inplace) +{ + Shape shape_a{4, 4}; + auto A = make_shared(element::f32, shape_a); + auto abs_A = make_shared(A); + + Shape shape_b{3, 2}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{4, 4}; + auto r = make_shared(abs_A, B, Coordinate{0, 1}, Coordinate{3, 3}); + auto abs_r = make_shared(r); + auto f = make_shared(abs_r, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{102, 103, 106, 107, 110, 111}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((vector{1, 102, 103, 4, 5, 106, 107, 8, 9, 110, 111, 12, 13, 14, 15, 16}), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, replace_slice_matrix) +{ + Shape shape_a{4, 4}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{3, 2}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{4, 4}; + auto r = make_shared(A, B, Coordinate{0, 1}, Coordinate{3, 3}); + auto f = make_shared(r, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{102, 103, 106, 107, 110, 111}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((vector{1, 102, 103, 4, 5, 106, 107, 8, 9, 110, 111, 12, 13, 14, 15, 16}), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, replace_slice_vector) +{ + Shape shape_a{16}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{12}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{16}; + auto r = make_shared(A, B, Coordinate{2}, Coordinate{14}); + auto f = make_shared(r, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ( + (vector{0, 1, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 14, 15}), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, replace_slice_3d) +{ + Shape shape_a{4, 4, 4}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{2, 2, 2}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{4, 4, 4}; + auto r = make_shared(A, B, Coordinate{1, 1, 1}, Coordinate{3, 3, 3}); + auto f = make_shared(r, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{921, 922, 925, 926, 937, 938, 941, 942}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + + 16, 17, 18, 19, 20, 921, 922, 23, 24, 925, 926, 27, 28, 29, 30, 31, + + 32, 33, 34, 35, 36, 937, 938, 39, 40, 941, 942, 43, 44, 45, 46, 47, + + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, replace_slice_3d_strided) +{ + Shape shape_a{4, 4, 4}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{2, 2, 2}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{4, 4, 4}; + auto r = make_shared( + A, B, Coordinate{0, 0, 0}, Coordinate{4, 4, 4}, Strides{2, 2, 2}); + auto f = make_shared(r, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{900, 902, 908, 910, 932, 934, 940, 942}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((vector{900, 1, 902, 3, 4, 5, 6, 7, 908, 9, 910, 11, 12, 13, 14, 15, + + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + + 932, 33, 934, 35, 36, 37, 38, 39, 940, 41, 942, 43, 44, 45, 46, 47, + + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, replace_slice_3d_strided_different_strides) +{ + Shape shape_a{4, 4, 4}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{2, 2, 2}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{4, 4, 4}; + auto r = make_shared( + A, B, Coordinate{0, 0, 0}, Coordinate{4, 4, 4}, Strides{2, 2, 3}); + auto f = make_shared(r, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{900, 903, 908, 911, 932, 935, 940, 943}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((vector{900, 1, 2, 903, 4, 5, 6, 7, 908, 9, 10, 911, 12, 13, 14, 15, + + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + + 932, 33, 34, 935, 36, 37, 38, 39, 940, 41, 42, 943, 44, 45, 46, 47, + + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_0d) +{ + Shape shape{}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{6}); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{6}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_1d_nochange) +{ + Shape shape{8}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7}); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{0, 1, 2, 3, 4, 5, 6, 7}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_1d_0) +{ + Shape shape{8}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7}); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{7, 6, 5, 4, 3, 2, 1, 0}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_2d_nochange) +{ + Shape shape{4, 3}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, + test::NDArray({{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}).get_vector()); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ( + (test::NDArray({{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}).get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_2d_0) +{ + Shape shape{4, 3}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, + test::NDArray({{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}).get_vector()); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ( + (test::NDArray({{9, 10, 11}, {6, 7, 8}, {3, 4, 5}, {0, 1, 2}}).get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_2d_1) +{ + Shape shape{4, 3}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, + test::NDArray({{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}).get_vector()); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ( + (test::NDArray({{2, 1, 0}, {5, 4, 3}, {8, 7, 6}, {11, 10, 9}}).get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_2d_01) +{ + Shape shape{4, 3}; + auto A = make_shared(element::f32, shape); + auto f = + make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, + test::NDArray({{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}).get_vector()); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ( + (test::NDArray({{11, 10, 9}, {8, 7, 6}, {5, 4, 3}, {2, 1, 0}}).get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_nochange) +{ + Shape shape{2, 4, 3}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, + test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, + {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) + .get_vector()); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, + {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) + .get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_0) +{ + Shape shape{2, 4, 3}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, + test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, + {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) + .get_vector()); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((test::NDArray({{{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}, + {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}}) + .get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_1) +{ + Shape shape{2, 4, 3}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, + test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, + {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) + .get_vector()); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((test::NDArray({{{9, 10, 11}, {6, 7, 8}, {3, 4, 5}, {0, 1, 2}}, + {{21, 22, 23}, {18, 19, 20}, {15, 16, 17}, {12, 13, 14}}}) + .get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_2) +{ + Shape shape{2, 4, 3}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{2}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, + test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, + {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) + .get_vector()); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((test::NDArray({{{2, 1, 0}, {5, 4, 3}, {8, 7, 6}, {11, 10, 9}}, + {{14, 13, 12}, {17, 16, 15}, {20, 19, 18}, {23, 22, 21}}}) + .get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_01) +{ + Shape shape{2, 4, 3}; + auto A = make_shared(element::f32, shape); + auto f = + make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, + test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, + {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) + .get_vector()); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((test::NDArray({{{21, 22, 23}, {18, 19, 20}, {15, 16, 17}, {12, 13, 14}}, + {{9, 10, 11}, {6, 7, 8}, {3, 4, 5}, {0, 1, 2}}}) + .get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_02) +{ + Shape shape{2, 4, 3}; + auto A = make_shared(element::f32, shape); + auto f = + make_shared(make_shared(A, AxisSet{0, 2}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, + test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, + {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) + .get_vector()); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((test::NDArray({{{14, 13, 12}, {17, 16, 15}, {20, 19, 18}, {23, 22, 21}}, + {{2, 1, 0}, {5, 4, 3}, {8, 7, 6}, {11, 10, 9}}}) + .get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_12) +{ + Shape shape{2, 4, 3}; + auto A = make_shared(element::f32, shape); + auto f = + make_shared(make_shared(A, AxisSet{1, 2}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, + test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, + {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) + .get_vector()); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((test::NDArray({{{11, 10, 9}, {8, 7, 6}, {5, 4, 3}, {2, 1, 0}}, + {{23, 22, 21}, {20, 19, 18}, {17, 16, 15}, {14, 13, 12}}}) + .get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_012) +{ + Shape shape{2, 4, 3}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{0, 1, 2}), + op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, + test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, + {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) + .get_vector()); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((test::NDArray({{{23, 22, 21}, {20, 19, 18}, {17, 16, 15}, {14, 13, 12}}, + {{11, 10, 9}, {8, 7, 6}, {5, 4, 3}, {2, 1, 0}}}) + .get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, numeric_float_nan) +{ + Shape shape{5}; + auto A = op::Constant::create(element::f32, shape, {-2.5f, 25.5f, 2.25f, NAN, 6.0f}); + auto B = op::Constant::create(element::f32, shape, {10.0f, 5.0f, 2.25f, 10.0f, NAN}); + auto f = make_shared(make_shared(A, B), op::ParameterVector{}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto result = backend->create_tensor(element::boolean, shape); + backend->call_with_validate(f, {result}, {}); + EXPECT_EQ((vector{false, false, true, false, false}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, numeric_double_nan) +{ + Shape shape{5}; + auto A = op::Constant::create(element::f64, shape, {-2.5f, 25.5f, 2.25f, NAN, 6.0f}); + auto B = op::Constant::create(element::f64, shape, {10.0f, 5.0f, 2.25f, 10.0f, NAN}); + auto f = make_shared(make_shared(A, B), op::ParameterVector{}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto result = backend->create_tensor(element::boolean, shape); + backend->call_with_validate(f, {result}, {}); + EXPECT_EQ((vector{false, false, true, false, false}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, numeric_float_inf) +{ + Shape shape{5}; + auto A = op::Constant::create(element::f32, shape, {-2.5f, 25.5f, 2.25f, INFINITY, 6.0f}); + auto B = op::Constant::create(element::f32, shape, {10.0f, 5.0f, 2.25f, 10.0f, -INFINITY}); + auto f = make_shared(make_shared(A, B), op::ParameterVector{}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto result = backend->create_tensor(element::boolean, shape); + backend->call_with_validate(f, {result}, {}); + EXPECT_EQ((vector{false, false, true, false, false}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, numeric_double_inf) +{ + Shape shape{5}; + auto A = op::Constant::create(element::f64, shape, {-2.5f, 25.5f, 2.25f, INFINITY, 6.0f}); + auto B = op::Constant::create(element::f64, shape, {10.0f, 5.0f, 2.25f, 10.0f, -INFINITY}); + auto f = make_shared(make_shared(A, B), op::ParameterVector{}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto result = backend->create_tensor(element::boolean, shape); + backend->call_with_validate(f, {result}, {}); + EXPECT_EQ((vector{false, false, true, false, false}), read_vector(result)); +} + +// +// From the XLA docs: https://www.tensorflow.org/performance/xla/operation_semantics#selectandscatter +// +NGRAPH_TEST(${BACKEND_NAME}, select_and_scatter_with_overlap) +{ + Shape shape_sel_a{}; + auto SEL_A = make_shared(element::f32, shape_sel_a); + Shape shape_sel_b{}; + auto SEL_B = make_shared(element::f32, shape_sel_b); + auto sel_f = make_shared(make_shared(SEL_A, SEL_B), + op::ParameterVector{SEL_A, SEL_B}); + + Shape shape_scatter_a{}; + auto SCATTER_A = make_shared(element::f32, shape_scatter_a); + Shape shape_scatter_b{}; + auto SCATTER_B = make_shared(element::f32, shape_scatter_b); + auto scatter_f = + make_shared(SCATTER_A + SCATTER_B, op::ParameterVector{SCATTER_A, SCATTER_B}); + + Shape shape_a{4, 5}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{2, 2}; + auto B = make_shared(element::f32, shape_b); + Shape shape_c{}; + auto C = make_shared(element::f32, shape_c); + Shape shape_r{4, 5}; + Shape window_shape{2, 3}; + auto window_strides = Strides{2, 2}; + auto f = make_shared( + make_shared(A, B, C, sel_f, scatter_f, window_shape, window_strides), + op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, + test::NDArray( + {{7, 2, 5, 3, 8}, {3, 8, 9, 3, 4}, {1, 5, 7, 5, 6}, {0, 6, 2, 10, 2}}) + .get_vector()); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, test::NDArray({{2, 6}, {3, 1}}).get_vector()); + auto c = backend->create_tensor(element::f32, shape_c); + copy_data(c, vector{0}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b, c}); + EXPECT_EQ((test::NDArray( + {{0, 0, 0, 0, 0}, {0, 0, 8, 0, 0}, {0, 0, 3, 0, 0}, {0, 0, 0, 1, 0}}) + .get_vector()), + read_vector(result)); +} + +// +// From the XLA docs: https://www.tensorflow.org/performance/xla/operation_semantics#selectandscatter +// +NGRAPH_TEST(${BACKEND_NAME}, select_and_scatter_without_overlap) +{ + Shape shape_sel_a{}; + auto SEL_A = make_shared(element::f32, shape_sel_a); + Shape shape_sel_b{}; + auto SEL_B = make_shared(element::f32, shape_sel_b); + auto sel_f = make_shared(make_shared(SEL_A, SEL_B), + op::ParameterVector{SEL_A, SEL_B}); + + Shape shape_scatter_a{}; + auto SCATTER_A = make_shared(element::f32, shape_scatter_a); + Shape shape_scatter_b{}; + auto SCATTER_B = make_shared(element::f32, shape_scatter_b); + auto scatter_f = + make_shared(SCATTER_A + SCATTER_B, op::ParameterVector{SCATTER_A, SCATTER_B}); + + Shape shape_a{4, 6}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{2, 2}; + auto B = make_shared(element::f32, shape_b); + Shape shape_c{}; + auto C = make_shared(element::f32, shape_c); + Shape shape_r{4, 6}; + Shape window_shape{2, 3}; + auto window_strides = Strides{2, 3}; + auto f = make_shared( + make_shared(A, B, C, sel_f, scatter_f, window_shape, window_strides), + op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, + test::NDArray( + {{7, 2, 5, 3, 10, 2}, {3, 8, 9, 3, 4, 2}, {1, 5, 7, 5, 6, 1}, {0, 6, 2, 7, 2, 8}}) + .get_vector()); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, test::NDArray({{2, 6}, {3, 1}}).get_vector()); + auto c = backend->create_tensor(element::f32, shape_c); + copy_data(c, vector{0}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b, c}); + EXPECT_EQ((test::NDArray( + {{0, 0, 0, 0, 6, 0}, {0, 0, 2, 0, 0, 0}, {0, 0, 3, 0, 0, 0}, {0, 0, 0, 0, 0, 1}}) + .get_vector()), + read_vector(result)); +} + +// +// Adapted from the XLA docs to provide an example in >2D: https://www.tensorflow.org/performance/xla/operation_semantics#selectandscatter +// +NGRAPH_TEST(${BACKEND_NAME}, select_and_scatter_3d_without_overlap) +{ + Shape shape_sel_a{}; + auto SEL_A = make_shared(element::f32, shape_sel_a); + Shape shape_sel_b{}; + auto SEL_B = make_shared(element::f32, shape_sel_b); + auto sel_f = make_shared(make_shared(SEL_A, SEL_B), + op::ParameterVector{SEL_A, SEL_B}); + + Shape shape_scatter_a{}; + auto SCATTER_A = make_shared(element::f32, shape_scatter_a); + Shape shape_scatter_b{}; + auto SCATTER_B = make_shared(element::f32, shape_scatter_b); + auto scatter_f = + make_shared(SCATTER_A + SCATTER_B, op::ParameterVector{SCATTER_A, SCATTER_B}); + + Shape shape_a{2, 4, 6}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{1, 2, 2}; + auto B = make_shared(element::f32, shape_b); + Shape shape_c{}; + auto C = make_shared(element::f32, shape_c); + Shape shape_r{2, 4, 6}; + Shape window_shape{2, 2, 3}; + auto window_strides = Strides{2, 2, 3}; + auto f = make_shared( + make_shared(A, B, C, sel_f, scatter_f, window_shape, window_strides), + op::ParameterVector{A, B, C}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data( + a, + test::NDArray( + {{{7, 2, 5, 3, 10, 2}, {3, 8, 9, 3, 4, 2}, {1, 5, 7, 5, 6, 1}, {0, 6, 2, 7, 2, 8}}, + {{2, 5, 8, 3, 4, 2}, {1, 2, 8, 4, 5, 2}, {10, 2, 3, 4, 1, 0}, {4, 1, 2, 4, 5, 7}}}) + .get_vector()); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, test::NDArray({{{2, 6}, {3, 1}}}).get_vector()); + auto c = backend->create_tensor(element::f32, shape_c); + copy_data(c, vector{0}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b, c}); + EXPECT_EQ( + (test::NDArray( + {{{0, 0, 0, 0, 6, 0}, {0, 0, 2, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 1}}, + {{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {3, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}}}) + .get_vector()), + read_vector(result)); +} + +template +void make_unary_empty_test(const string& backend_name) +{ + Shape shape{0}; + + op::ParameterVector params; + NodeVector result_list; + for (size_t i = 0; i < s_known_element_types.size(); i++) + { + shared_ptr p = make_shared(s_known_element_types[i], shape); + params.push_back(p); + result_list.push_back(make_shared(p)); + } + + auto f = make_shared(result_list, params); + auto backend = runtime::Backend::create(backend_name); + + vector> inputs; + vector> outputs; + for (size_t i = 0; i < s_known_element_types.size(); i++) + { + inputs.push_back(backend->create_tensor(s_known_element_types[i], shape)); + outputs.push_back(backend->create_tensor(s_known_element_types[i], shape)); + } + + backend->call_with_validate(f, outputs, inputs); + + EXPECT_EQ(read_vector(inputs[0]).size(), 0); + EXPECT_EQ(read_vector(inputs[1]).size(), 0); + EXPECT_EQ(read_vector(inputs[2]).size(), 0); + EXPECT_EQ(read_vector(inputs[3]).size(), 0); + EXPECT_EQ(read_vector(inputs[4]).size(), 0); + EXPECT_EQ(read_vector(inputs[5]).size(), 0); + EXPECT_EQ(read_vector(inputs[6]).size(), 0); + EXPECT_EQ(read_vector(inputs[7]).size(), 0); + EXPECT_EQ(read_vector(inputs[8]).size(), 0); + EXPECT_EQ(read_vector(inputs[9]).size(), 0); + + EXPECT_EQ(read_vector(outputs[0]).size(), 0); + EXPECT_EQ(read_vector(outputs[1]).size(), 0); + EXPECT_EQ(read_vector(outputs[2]).size(), 0); + EXPECT_EQ(read_vector(outputs[3]).size(), 0); + EXPECT_EQ(read_vector(outputs[4]).size(), 0); + EXPECT_EQ(read_vector(outputs[5]).size(), 0); + EXPECT_EQ(read_vector(outputs[6]).size(), 0); + EXPECT_EQ(read_vector(outputs[7]).size(), 0); + EXPECT_EQ(read_vector(outputs[8]).size(), 0); + EXPECT_EQ(read_vector(outputs[9]).size(), 0); +} + +template +void make_binary_empty_test(const string& backend_name, bool is_comparison = false) +{ + Shape shape{0}; + op::ParameterVector A; + for (size_t i = 0; i < s_known_element_types.size(); i++) + { + A.push_back(make_shared(s_known_element_types[i], shape)); + } + + NodeVector result_list; + for (shared_ptr p : A) + { + result_list.push_back(make_shared(p, p)); + } + + auto f = make_shared(result_list, A); + auto backend = runtime::Backend::create(backend_name); + + vector> inputs; + vector> outputs; + for (size_t i = 0; i < s_known_element_types.size(); i++) + { + inputs.push_back(backend->create_tensor(s_known_element_types[i], shape)); + if (is_comparison) + { + outputs.push_back(backend->create_tensor(element::from(), shape)); + } + else + { + outputs.push_back(backend->create_tensor(s_known_element_types[i], shape)); + } + } + + backend->call_with_validate(f, outputs, inputs); + + EXPECT_EQ(read_vector(inputs[0]).size(), 0); + EXPECT_EQ(read_vector(inputs[1]).size(), 0); + EXPECT_EQ(read_vector(inputs[2]).size(), 0); + EXPECT_EQ(read_vector(inputs[3]).size(), 0); + EXPECT_EQ(read_vector(inputs[4]).size(), 0); + EXPECT_EQ(read_vector(inputs[5]).size(), 0); + EXPECT_EQ(read_vector(inputs[6]).size(), 0); + EXPECT_EQ(read_vector(inputs[7]).size(), 0); + EXPECT_EQ(read_vector(inputs[8]).size(), 0); + EXPECT_EQ(read_vector(inputs[9]).size(), 0); + + if (is_comparison) + { + EXPECT_EQ(read_vector(outputs[0]).size(), 0); + EXPECT_EQ(read_vector(outputs[1]).size(), 0); + EXPECT_EQ(read_vector(outputs[2]).size(), 0); + EXPECT_EQ(read_vector(outputs[3]).size(), 0); + EXPECT_EQ(read_vector(outputs[4]).size(), 0); + EXPECT_EQ(read_vector(outputs[5]).size(), 0); + EXPECT_EQ(read_vector(outputs[6]).size(), 0); + EXPECT_EQ(read_vector(outputs[7]).size(), 0); + EXPECT_EQ(read_vector(outputs[8]).size(), 0); + EXPECT_EQ(read_vector(outputs[9]).size(), 0); + } + else + { + EXPECT_EQ(read_vector(outputs[0]).size(), 0); + EXPECT_EQ(read_vector(outputs[1]).size(), 0); + EXPECT_EQ(read_vector(outputs[2]).size(), 0); + EXPECT_EQ(read_vector(outputs[3]).size(), 0); + EXPECT_EQ(read_vector(outputs[4]).size(), 0); + EXPECT_EQ(read_vector(outputs[5]).size(), 0); + EXPECT_EQ(read_vector(outputs[6]).size(), 0); + EXPECT_EQ(read_vector(outputs[7]).size(), 0); + EXPECT_EQ(read_vector(outputs[8]).size(), 0); + EXPECT_EQ(read_vector(outputs[9]).size(), 0); + } +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_abs) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_ceiling) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_exp) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_floor) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_log) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_negative) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_not) +{ + Shape shape{0}; + auto A = make_shared(element::from(), shape); + auto f = make_shared(make_shared(A), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::from(), shape); + auto result = backend->create_tensor(element::from(), shape); + + backend->call_with_validate(f, {result}, {a}); + + auto in_vec = read_vector(a); + auto out_vec = read_vector(result); + + EXPECT_EQ(in_vec.size(), 0); + EXPECT_EQ(out_vec.size(), 0); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_sign) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_sqrt) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_sin) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_sinh) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_cos) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_cosh) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_tan) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_tanh) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_asin) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_acos) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_atan) +{ + make_unary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_add) +{ + make_binary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_divide) +{ + make_binary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_eq) +{ + make_binary_empty_test("${BACKEND_NAME}", true); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_greater) +{ + make_binary_empty_test("${BACKEND_NAME}", true); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_greatereq) +{ + make_binary_empty_test("${BACKEND_NAME}", true); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_less) +{ + make_binary_empty_test("${BACKEND_NAME}", true); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_lesseq) +{ + make_binary_empty_test("${BACKEND_NAME}", true); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_maximum) +{ + make_binary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_minimum) +{ + make_binary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_multiply) +{ + make_binary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_not_equal) +{ + make_binary_empty_test("${BACKEND_NAME}", true); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_power) +{ + make_binary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, zero_sized_subtract) +{ + make_binary_empty_test("${BACKEND_NAME}"); +} + +NGRAPH_TEST(${BACKEND_NAME}, convolution_outlining) +{ + Shape shape_a{1, 2, 2, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{2, 2, 1, 1}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{1, 2, 2, 2}; + auto conv1 = make_shared(A, + B, + Strides{1, 1}, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + auto conv2 = make_shared(conv1, + B, + Strides{1, 1}, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + auto f = make_shared(conv2, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{1.0f, 1.0f, 1.0f, 1.0f}); + auto result = backend->create_tensor(element::f32, shape_r); + + vector expected_result{4.0f, 4.0f, 4.0f, 4.0f, 4.0f, 4.0f, 4.0f, 4.0f}; + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ(vector{expected_result}, read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, computation_reuse) +{ + Shape shape_a{1, 16, 2, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{32, 16, 1, 1}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{1, 32, 2, 2}; + auto conv = make_shared(A, + B, + Strides{1, 1}, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + Shape pool_shape{1, 1}; + auto pool = make_shared(conv, pool_shape); + auto bias = make_shared( + op::Constant::create(element::f32, Shape{}, {2.14}), shape_r, AxisSet{0, 1, 2, 3}); + auto result_op = make_shared(pool + bias); + auto f = make_shared(ResultVector{result_op}, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + vector input(64, 1.0f); + vector weights(512, 0.5f); + vector rv(128); + + auto a = backend->create_tensor(element::f32, shape_a, input.data()); + auto b = backend->create_tensor(element::f32, shape_b, weights.data()); + auto result = backend->create_tensor(element::f32, shape_r, rv.data()); + + backend->call_with_validate(f, {result}, {a, b}); + + vector rv_saved(rv); + + b->set_stale(false); + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ(rv_saved, rv); +} + +NGRAPH_TEST(${BACKEND_NAME}, pad_interior_1d) +{ + Shape shape_a{6}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{16}; + Shape padding_below{0}; + Shape padding_above{0}; + Shape padding_interior{2}; + auto f = make_shared( + make_shared(A, B, padding_below, padding_above, padding_interior), + op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, test::NDArray({1, 2, 3, 4, 5, 6}).get_vector()); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{2112}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((test::NDArray( + {1, 2112, 2112, 2, 2112, 2112, 3, 2112, 2112, 4, 2112, 2112, 5, 2112, 2112, 6}) + .get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, pad_exterior_1d) +{ + Shape shape_a{6}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{15}; + Shape padding_below{4}; + Shape padding_above{5}; + Shape padding_interior{0}; + auto f = make_shared( + make_shared(A, B, padding_below, padding_above, padding_interior), + op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, test::NDArray({1, 2, 3, 4, 5, 6}).get_vector()); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{2112}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((test::NDArray( + {2112, 2112, 2112, 2112, 1, 2, 3, 4, 5, 6, 2112, 2112, 2112, 2112, 2112}) + .get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, pad_interior_exterior_1d) +{ + Shape shape_a{6}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{25}; + Shape padding_below{4}; + Shape padding_above{5}; + Shape padding_interior{2}; + auto f = make_shared( + make_shared(A, B, padding_below, padding_above, padding_interior), + op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, test::NDArray({1, 2, 3, 4, 5, 6}).get_vector()); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{2112}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((test::NDArray({2112, 2112, 2112, 2112, 1, 2112, 2112, 2, 2112, + 2112, 3, 2112, 2112, 4, 2112, 2112, 5, 2112, + 2112, 6, 2112, 2112, 2112, 2112, 2112}) + .get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, pad_interior_exterior_2d) +{ + Shape shape_a{2, 3}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{7, 6}; + Shape padding_below{1, 0}; + Shape padding_above{2, 1}; + Shape padding_interior{2, 1}; + auto f = make_shared( + make_shared(A, B, padding_below, padding_above, padding_interior), + op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, test::NDArray({{1, 2, 3}, {4, 5, 6}}).get_vector()); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{9}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((test::NDArray({{9, 9, 9, 9, 9, 9}, + {1, 9, 2, 9, 3, 9}, + {9, 9, 9, 9, 9, 9}, + {9, 9, 9, 9, 9, 9}, + {4, 9, 5, 9, 6, 9}, + {9, 9, 9, 9, 9, 9}, + {9, 9, 9, 9, 9, 9}}) + .get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, pad_exterior_2d_0x0) +{ + Shape shape_a{0, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{5, 5}; + Shape padding_below{2, 3}; + Shape padding_above{3, 2}; + Shape padding_interior{0, 0}; + auto f = make_shared( + make_shared(A, B, padding_below, padding_above, padding_interior), + op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + // copy_data(a, test::NDArray({{}}).get_vector()); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{2112}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((test::NDArray({{2112, 2112, 2112, 2112, 2112}, + {2112, 2112, 2112, 2112, 2112}, + {2112, 2112, 2112, 2112, 2112}, + {2112, 2112, 2112, 2112, 2112}, + {2112, 2112, 2112, 2112, 2112}}) + .get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, pad_exterior_2d_0x3) +{ + Shape shape_a{0, 3}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{5, 5}; + Shape padding_below{2, 1}; + Shape padding_above{3, 1}; + Shape padding_interior{0, 0}; + auto f = make_shared( + make_shared(A, B, padding_below, padding_above, padding_interior), + op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + // copy_data(a, test::NDArray({}).get_vector()); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{2112}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((test::NDArray({{2112, 2112, 2112, 2112, 2112}, + {2112, 2112, 2112, 2112, 2112}, + {2112, 2112, 2112, 2112, 2112}, + {2112, 2112, 2112, 2112, 2112}, + {2112, 2112, 2112, 2112, 2112}}) + .get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, pad_exterior_2d_3x0) +{ + Shape shape_a{3, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{5, 5}; + Shape padding_below{1, 3}; + Shape padding_above{1, 2}; + Shape padding_interior{0, 0}; + auto f = make_shared( + make_shared(A, B, padding_below, padding_above, padding_interior), + op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + // copy_data(a, test::NDArray({}).get_vector()); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{2112}); + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((test::NDArray({{2112, 2112, 2112, 2112, 2112}, + {2112, 2112, 2112, 2112, 2112}, + {2112, 2112, 2112, 2112, 2112}, + {2112, 2112, 2112, 2112, 2112}, + {2112, 2112, 2112, 2112, 2112}}) + .get_vector()), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, pad_exterior_4d_1x2x2x2) +{ + Shape shape_a{1, 2, 2, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{1, 2, 4, 4}; + Shape padding_below{0, 0, 1, 1}; + Shape padding_above{0, 0, 1, 1}; + Shape padding_interior{0, 0, 0, 0}; + auto f = make_shared( + make_shared(A, B, padding_below, padding_above, padding_interior), + op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + // clang-format off + copy_data(a, test::NDArray( + { + { + { + {0.0f, 0.0f}, + {0.0f, 0.0f} + }, + { + {0.0f, 0.0f}, + {0.0f, 0.0f} + } + } + }).get_vector()); + // clang-format on + + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{42}); + + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + // clang-format off + EXPECT_EQ((test::NDArray( + { + { + { + {42.0f, 42.0f, 42.0f, 42.0f}, + {42.0f, 0.0f, 0.0f, 42.0f}, + {42.0f, 0.0f, 0.0f, 42.0f}, + {42.0f, 42.0f, 42.0f, 42.0f} + }, + { + {42.0f, 42.0f, 42.0f, 42.0f}, + {42.0f, 0.0f, 0.0f, 42.0f}, + {42.0f, 0.0f, 0.0f, 42.0f}, + {42.0f, 42.0f, 42.0f, 42.0f} + } + } + }).get_vector()), + read_vector(result)); + // clang-format on +} + +// This is a regression test for one of TF's unit tests, which was failing. +// The problem was inappropriate handling of the shape computation for a +// zero-length axis with interior padding. Rather than subtract 1 from the +// source shape and multiply by the interior padding (which causes underflow), +// we should just count the pre-interior-padding length as zero. +NGRAPH_TEST(${BACKEND_NAME}, pad_interior_exterior_4d_2x0x3x2) +{ + Shape shape_a{2, 0, 3, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{}; + auto B = make_shared(element::f32, shape_b); + Shape padding_below{1, 0, 0, 0}; + Shape padding_above{0, 2, 0, 0}; + Shape padding_interior{2, 1, 0, 0}; + Shape shape_r{5, 2, 3, 2}; + auto f = make_shared( + make_shared(A, B, padding_below, padding_above, padding_interior), + op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + // copy_data(a, test::NDArray({}).get_vector()); + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{2112}); + auto result = backend->create_tensor(element::f32, shape_r); + + vector expected(5 * 2 * 3 * 2, 2112); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ(expected, read_vector(result)); +} + +// This test covers the case with multiple image and with asymetric pad +// bug has been found on nvGPU side now covered by this test +NGRAPH_TEST(${BACKEND_NAME}, pad_2channel_2image_asym) +{ + Shape shape_a{2, 2, 4, 4}; + auto window_movement_strides = Strides{2, 2}; + Shape padding_below{0, 0, 0, 0}; + Shape padding_above{0, 0, 2, 2}; + Shape padding_interior{0, 0, 0, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{2, 2, 6, 6}; + auto f = make_shared( + make_shared(A, B, padding_below, padding_above, padding_interior), + op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, + test::NDArray({{{{0, 1, 0, 2}, // img 0 chan 0 + {0, 3, 2, 0}, + {2, 0, 0, 0}, + {0, 2, 1, 0}}, + + {{0, 0, 0, 2}, // img 0 chan 1 + {0, 2, 3, 0}, + {2, 0, 1, 0}, + {2, 0, 0, 0}}}, + + {{{0, 2, 1, 1}, // img 1 chan 0 + {0, 0, 2, 0}, + {0, 0, 1, 2}, + {0, 0, 0, 0}}, + + {{2, 1, 0, 0}, // img 1 chan 1 + {0, 2, 0, 0}, + {1, 1, 2, 0}, + {1, 0, 0, 0}}}}) + .get_vector()); + + auto b = backend->create_tensor(element::f32, shape_b); + copy_data(b, vector{42}); + + auto result = backend->create_tensor(element::f32, shape_r); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((test::NDArray({{{{0, 1, 0, 2, 42, 42}, // img 0 chan 0 + {0, 3, 2, 0, 42, 42}, + {2, 0, 0, 0, 42, 42}, + {0, 2, 1, 0, 42, 42}, + {42, 42, 42, 42, 42, 42}, + {42, 42, 42, 42, 42, 42}}, + + {{0, 0, 0, 2, 42, 42}, // img 1 chan 0 + {0, 2, 3, 0, 42, 42}, + {2, 0, 1, 0, 42, 42}, + {2, 0, 0, 0, 42, 42}, + {42, 42, 42, 42, 42, 42}, + {42, 42, 42, 42, 42, 42}}}, + + {{{0, 2, 1, 1, 42, 42}, // img 1 chan 0 + {0, 0, 2, 0, 42, 42}, + {0, 0, 1, 2, 42, 42}, + {0, 0, 0, 0, 42, 42}, + {42, 42, 42, 42, 42, 42}, + {42, 42, 42, 42, 42, 42}}, + + {{2, 1, 0, 0, 42, 42}, // img 1 chan 1 + {0, 2, 0, 0, 42, 42}, + {1, 1, 2, 0, 42, 42}, + {1, 0, 0, 0, 42, 42}, + {42, 42, 42, 42, 42, 42}, + {42, 42, 42, 42, 42, 42}}}}) + .get_vector()), + read_vector(result)); +} + +// Trivial case with no reduced axes. +NGRAPH_TEST(${BACKEND_NAME}, product_trivial) +{ + Shape shape{2, 2}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 2, 3, 4}); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); +} + +// Failure has been reported at 5D for some reason +NGRAPH_TEST(${BACKEND_NAME}, product_trivial_5d) +{ + Shape shape{2, 2, 2, 2, 2}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, product_to_scalar) +{ + Shape shape{2, 2}; + auto A = make_shared(element::f32, shape); + auto f = + make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 2, 3, 4}); + auto result = backend->create_tensor(element::f32, Shape{}); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{24}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, product_matrix_columns) +{ + Shape shape_a{3, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{2}; + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{15, 48}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{1, 2, 3, 4, 5, 6}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, product_matrix_rows) +{ + Shape shape_a{3, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3}; + auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{2, 12, 30}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{1, 2, 3, 4, 5, 6}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, product_matrix_rows_zero) +{ + Shape shape_a{3, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3}; + auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3, 3, 3})); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 1, 1}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, product_matrix_cols_zero) +{ + // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})). + Shape shape_a{0, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{2}; + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3, 3})); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 1}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, product_vector_zero) +{ + Shape shape_a{0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{}; + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3})); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, product_matrix_to_scalar_zero_by_zero) +{ + Shape shape_a{0, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{}; + auto f = + make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3})); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, product_3d_to_matrix_most_sig) +{ + Shape shape_a{3, 3, 3}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3, 3}; + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1 * 10 * 19, + 2 * 11 * 20, + 3 * 12 * 21, + 4 * 13 * 22, + 5 * 14 * 23, + 6 * 15 * 24, + 7 * 16 * 25, + 8 * 17 * 26, + 9 * 18 * 27}), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, product_3d_to_matrix_least_sig) +{ + Shape shape_a{3, 3, 3}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3, 3}; + auto f = make_shared(make_shared(A, AxisSet{2}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1 * 2 * 3, + 4 * 5 * 6, + 7 * 8 * 9, + 10 * 11 * 12, + 13 * 14 * 15, + 16 * 17 * 18, + 19 * 20 * 21, + 22 * 23 * 24, + 25 * 26 * 27}), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, product_3d_to_vector) +{ + Shape shape_a{3, 3, 3}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3}; + auto f = + make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1.0f * 10.0f * 19.0f * 4.0f * 13.0f * 22.0f * 7.0f * 16.0f * 25.0f, + 2.0f * 11.0f * 20.0f * 5.0f * 14.0f * 23.0f * 8.0f * 17.0f * 26.0f, + 3.0f * 12.0f * 21.0f * 6.0f * 15.0f * 24.0f * 9.0f * 18.0f * 27.0f}), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, product_3d_to_scalar) +{ + Shape shape_a{3, 3, 3}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{}; + auto f = make_shared(make_shared(A, AxisSet{0, 1, 2}), + op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_TRUE(test::all_close(vector{1.0f * 10.0f * 9.0f * 4.0f * 13.0f * 6.0f * 7.0f * + 12.0f * 3.0f * 2.0f * 11.0f * 8.0f * 5.0f * 14.0f * + 5.0f * 8.0f * 11.0f * 2.0f * 3.0f * 12.0f * 7.0f * + 6.0f * 13.0f * 4.0f * 9.0f * 10.0f * 1.0f}, + read_vector(result))); +} + +NGRAPH_TEST(${BACKEND_NAME}, product_3d_eliminate_zero_dim) +{ + Shape shape_a{3, 0, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3, 2}; + auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + + // Overwrite the initial result vector to make sure we're not just coincidentally getting the right value. + copy_data(result, vector{2112, 2112, 2112, 2112, 2112, 2112}); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 1, 1, 1, 1, 1}), read_vector(result)); +} + +// Trivial case with no reduced axes. +NGRAPH_TEST(${BACKEND_NAME}, max_trivial) +{ + Shape shape{2, 2}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 2, 3, 4}); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); +} + +// Failure has been reported at 5D for some reason +NGRAPH_TEST(${BACKEND_NAME}, max_trivial_5d) +{ + Shape shape{2, 2, 2, 2, 2}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, max_to_scalar) +{ + Shape shape{2, 2}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 2, 3, 4}); + auto result = backend->create_tensor(element::f32, Shape{}); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{4}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, max_matrix_columns) +{ + Shape shape_a{3, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{2}; + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{5, 6}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{1, 2, 3, 4, 5, 6}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, max_matrix_rows) +{ + Shape shape_a{3, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3}; + auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{2, 4, 6}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{1, 2, 3, 4, 5, 6}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, max_matrix_rows_zero) +{ + Shape shape_a{3, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3}; + auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3, 3, 3})); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{-std::numeric_limits::infinity(), + -std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}), + read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, max_matrix_cols_zero) +{ + // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})). + Shape shape_a{0, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{2}; + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3, 3})); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{-std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}), + read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, max_vector_zero) +{ + Shape shape_a{0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{}; + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3})); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{-std::numeric_limits::infinity()}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, max_matrix_to_scalar_zero_by_zero) +{ + Shape shape_a{0, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{}; + auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3})); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{-std::numeric_limits::infinity()}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, max_3d_to_matrix_most_sig) +{ + Shape shape_a{3, 3, 3}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3, 3}; + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{19, 20, 21, 22, 23, 24, 25, 26, 27}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, max_3d_to_matrix_least_sig) +{ + Shape shape_a{3, 3, 3}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3, 3}; + auto f = make_shared(make_shared(A, AxisSet{2}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{3, 6, 9, 12, 15, 18, 21, 24, 27}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, max_3d_to_vector) +{ + Shape shape_a{3, 3, 3}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3}; + auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{25.0f, 26.0f, 27.0f}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, max_3d_to_scalar) +{ + Shape shape_a{3, 3, 3}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{}; + auto f = + make_shared(make_shared(A, AxisSet{0, 1, 2}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{14.0f}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, max_3d_eliminate_zero_dim) +{ + Shape shape_a{3, 0, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3, 2}; + auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + + // Overwrite the initial result vector to make sure we're not just coincidentally getting the right value. + copy_data(result, vector{2112, 2112, 2112, 2112, 2112, 2112}); + + float mi = -std::numeric_limits::infinity(); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{mi, mi, mi, mi, mi, mi}), read_vector(result)); +} + +// Trivial case with no reduced axes. +NGRAPH_TEST(${BACKEND_NAME}, min_trivial) +{ + Shape shape{2, 2}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 2, 3, 4}); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); +} + +// Failure has been reported at 5D for some reason +NGRAPH_TEST(${BACKEND_NAME}, min_trivial_5d) +{ + Shape shape{2, 2, 2, 2, 2}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), + read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, min_to_scalar) +{ + Shape shape{2, 2}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{1, 2, 3, 4}); + auto result = backend->create_tensor(element::f32, Shape{}); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, min_matrix_columns) +{ + Shape shape_a{3, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{2}; + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 2}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{1, 2, 3, 4, 5, 6}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, min_matrix_rows) +{ + Shape shape_a{3, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3}; + auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 3, 5}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{1, 2, 3, 4, 5, 6}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, min_matrix_rows_zero) +{ + Shape shape_a{3, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3}; + auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3, 3, 3})); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + std::numeric_limits::infinity()}), + read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, min_matrix_cols_zero) +{ + // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})). + Shape shape_a{0, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{2}; + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3, 3})); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{std::numeric_limits::infinity(), + std::numeric_limits::infinity()}), + read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, min_vector_zero) +{ + Shape shape_a{0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{}; + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3})); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{std::numeric_limits::infinity()}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, min_matrix_to_scalar_zero_by_zero) +{ + Shape shape_a{0, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{}; + auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3})); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{std::numeric_limits::infinity()}), read_vector(result)); + + // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the + // input tensors, so let's do this too. + EXPECT_EQ((vector{}), read_vector(a)); +} + +NGRAPH_TEST(${BACKEND_NAME}, min_3d_to_matrix_most_sig) +{ + Shape shape_a{3, 3, 3}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3, 3}; + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 2, 3, 4, 5, 6, 7, 8, 9}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, min_3d_to_matrix_least_sig) +{ + Shape shape_a{3, 3, 3}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3, 3}; + auto f = make_shared(make_shared(A, AxisSet{2}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 4, 7, 10, 13, 16, 19, 22, 25}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, min_3d_to_vector) +{ + Shape shape_a{3, 3, 3}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3}; + auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1, 2, 3}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, min_3d_to_scalar) +{ + Shape shape_a{3, 3, 3}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{}; + auto f = + make_shared(make_shared(A, AxisSet{0, 1, 2}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}); + auto result = backend->create_tensor(element::f32, shape_rt); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{1}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, min_3d_eliminate_zero_dim) +{ + Shape shape_a{3, 0, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3, 2}; + auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + + // Overwrite the initial result vector to make sure we're not just coincidentally getting the right value. + copy_data(result, vector{2112, 2112, 2112, 2112, 2112, 2112}); + + float inf = std::numeric_limits::infinity(); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ((vector{inf, inf, inf, inf, inf, inf}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, sigmoid_n1c1h2w2) +{ + auto input = make_shared(element::f32, Shape{1, 1, 2, 2}); + auto sigmoid_node = make_shared(input); + auto func = make_shared(sigmoid_node, op::ParameterVector{input}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + shared_ptr a = backend->create_tensor(element::f32, input->get_shape()); + shared_ptr result = backend->create_tensor(element::f32, input->get_shape()); + + vector dataA{1.0f, 4.0f, 1.0f, 4.0f}; + copy_data(a, dataA); + + backend->call_with_validate(func, {result}, {a}); + vector expected{0.73105858f, 0.98201379f, 0.73105858f, 0.98201379f}; + ASSERT_TRUE(read_vector(result) == expected); +} + +NGRAPH_TEST(${BACKEND_NAME}, sigmoid_n1c1h4) +{ + auto input = make_shared(element::f32, Shape{1, 1, 4}); + auto sigmoid_node = make_shared(input); + auto func = make_shared(sigmoid_node, op::ParameterVector{input}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + shared_ptr a = backend->create_tensor(element::f32, input->get_shape()); + shared_ptr result = backend->create_tensor(element::f32, input->get_shape()); + + vector dataA{1.0f, 4.0f, 1.0f, 4.0f}; + copy_data(a, dataA); + + backend->call_with_validate(func, {result}, {a}); + vector expected{0.73105858f, 0.98201379f, 0.73105858f, 0.98201379f}; + ASSERT_TRUE(read_vector(result) == expected); +} + +NGRAPH_TEST(${BACKEND_NAME}, sigmoid_bprop_n1c1h4) +{ + auto input = make_shared(element::f32, Shape{1, 1, 4}); + auto delta = make_shared(element::f32, Shape{1, 1, 4}); + auto sigmoid_node = make_shared(input, delta); + auto func = make_shared(sigmoid_node, op::ParameterVector{input, delta}); + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + shared_ptr a = backend->create_tensor(element::f32, input->get_shape()); + shared_ptr b = backend->create_tensor(element::f32, delta->get_shape()); + shared_ptr result = backend->create_tensor(element::f32, input->get_shape()); + + vector dataA{1.0f, 4.0f, 1.0f, 4.0f}; + vector dataB{1.0f, 1.0f, 1.0f, 1.0f}; + + copy_data(a, dataA); + copy_data(b, dataB); + backend->call_with_validate(func, {result}, {a, b}); + + vector expected{0.196612f, 0.0176627f, 0.196612f, 0.0176627f}; + EXPECT_TRUE(test::all_close(expected, read_vector(result))); +} + +NGRAPH_TEST(${BACKEND_NAME}, relu_2Dfprop) +{ + auto shape_a = Shape{2, 5}; + auto A = make_shared(element::f32, shape_a); + auto relu = make_shared(A); + auto shape_rt = Shape{2, 5}; + auto f = make_shared(relu, op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5}); + auto result = backend->create_tensor(element::f32, shape_rt); + vector expected{1, 8, 0, 17, 0, 1, 8, 0, 17, 0}; + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ(read_vector(result), expected); +} + +NGRAPH_TEST(${BACKEND_NAME}, relu_4Dfprop) +{ + auto shape_a = Shape{2, 2, 2, 2}; + auto A = make_shared(element::f32, shape_a); + auto relu = make_shared(A); + auto shape_rt = Shape{2, 2, 2, 2}; + auto f = make_shared(relu, op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1}); + auto result = backend->create_tensor(element::f32, shape_rt); + vector expected{1, 8, 0, 17, 0, 1, 8, 0, 17, 0, 1, 8, 0, 17, 0, 1}; + + backend->call_with_validate(f, {result}, {a}); + EXPECT_EQ(read_vector(result), expected); +} + +NGRAPH_TEST(${BACKEND_NAME}, fuse_max_with_constant_zero_input_as_relu) +{ + auto shape_a = Shape{2, 5}; + auto A = op::Constant::create(element::f32, shape_a, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); + auto B = make_shared(element::f32, shape_a); + auto max = make_shared(A, B); + auto shape_rt = Shape{2, 5}; + auto f = make_shared(max, op::ParameterVector{B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto b = backend->create_tensor(element::f32, shape_a); + copy_data(b, vector{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5}); + auto result = backend->create_tensor(element::f32, shape_rt); + vector expected{1, 8, 0, 17, 0, 1, 8, 0, 17, 0}; + + backend->call_with_validate(f, {result}, {b}); + EXPECT_EQ(read_vector(result), expected); +} + +NGRAPH_TEST(${BACKEND_NAME}, relu_2Dbackprop) +{ + auto shape_a = Shape{2, 5}; + auto A = make_shared(element::f32, shape_a); + auto delta_val = make_shared(element::f32, shape_a); + auto relu = make_shared(A, delta_val); + auto shape_rt = Shape{2, 5}; + auto f = make_shared(relu, op::ParameterVector{A, delta_val}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5}); + auto delta = backend->create_tensor(element::f32, shape_a); + copy_data(delta, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + auto result = backend->create_tensor(element::f32, shape_rt); + vector expected{1, 2, 0, 4, 0, 6, 7, 0, 9, 0}; + + backend->call_with_validate(f, {result}, {a, delta}); + EXPECT_EQ(read_vector(result), expected); +} + +NGRAPH_TEST(${BACKEND_NAME}, relu_4Dbackprop) +{ + auto shape_a = Shape{2, 2, 2, 2}; + auto A = make_shared(element::f32, shape_a); + auto delta_val = make_shared(element::f32, shape_a); + auto relu = make_shared(A, delta_val); + auto shape_rt = Shape{2, 2, 2, 2}; + auto f = make_shared(relu, op::ParameterVector{A, delta_val}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1}); + auto delta = backend->create_tensor(element::f32, shape_a); + copy_data(delta, vector{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1}); + auto result = backend->create_tensor(element::f32, shape_rt); + vector expected{1, 8, 0, 17, 0, 1, 8, 0, 17, 0, 1, 8, 0, 17, 0, 1}; + + backend->call_with_validate(f, {result}, {a, delta}); + EXPECT_EQ(read_vector(result), expected); +} + +NGRAPH_TEST(${BACKEND_NAME}, softmax_all) +{ + Shape shape{2, 3}; + auto A = make_shared(element::f32, shape); + auto f = + make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{-3, -2, -1, 0, 1, 2}); + auto result = backend->create_tensor(element::f32, shape); + + auto d = expf(-3) + expf(-2) + expf(-1) + expf(0) + expf(1) + expf(2); + + backend->call_with_validate(f, {result}, {a}); + vector expected{ + expf(-3) / d, expf(-2) / d, expf(-1) / d, expf(0) / d, expf(1) / d, expf(2) / d}; + EXPECT_TRUE(test::all_close_f(expected, read_vector(result))); + + // empty AxisSet is the same as "full" AxisSet + f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); + backend = runtime::Backend::create("${BACKEND_NAME}"); + + backend->call_with_validate(f, {result}, {a}); + EXPECT_TRUE(test::all_close_f(expected, read_vector(result))); +} + +NGRAPH_TEST(${BACKEND_NAME}, softmax_axis_3d) +{ + Shape shape{2, 2, 3}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{-10, -20, -30, -40, -50, -60, -1, -2, -3, -4, -5, -6}); + auto result = backend->create_tensor(element::f32, shape); + + auto d0 = expf(-10) + expf(-1); + auto d1 = expf(-20) + expf(-2); + auto d2 = expf(-30) + expf(-3); + auto d3 = expf(-40) + expf(-4); + auto d4 = expf(-50) + expf(-5); + auto d5 = expf(-60) + expf(-6); + + backend->call_with_validate(f, {result}, {a}); + vector expected{expf(-10) / d0, + expf(-20) / d1, + expf(-30) / d2, + expf(-40) / d3, + expf(-50) / d4, + expf(-60) / d5, + expf(-1) / d0, + expf(-2) / d1, + expf(-3) / d2, + expf(-4) / d3, + expf(-5) / d4, + expf(-6) / d5}; + + EXPECT_TRUE(test::all_close(expected, read_vector(result))); +} + +NGRAPH_TEST(${BACKEND_NAME}, softmax_axis_3d_double) +{ + Shape shape{2, 2, 3}; + auto A = make_shared(element::f64, shape); + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f64, shape); + copy_data(a, vector{-10, -20, -30, -40, -50, -60, -1, -2, -3, -4, -5, -6}); + auto result = backend->create_tensor(element::f64, shape); + + auto d0 = expf(-10) + expf(-1); + auto d1 = expf(-20) + expf(-2); + auto d2 = expf(-30) + expf(-3); + auto d3 = expf(-40) + expf(-4); + auto d4 = expf(-50) + expf(-5); + auto d5 = expf(-60) + expf(-6); + + backend->call_with_validate(f, {result}, {a}); + vector expected{expf(-10) / d0, + expf(-20) / d1, + expf(-30) / d2, + expf(-40) / d3, + expf(-50) / d4, + expf(-60) / d5, + expf(-1) / d0, + expf(-2) / d1, + expf(-3) / d2, + expf(-4) / d3, + expf(-5) / d4, + expf(-6) / d5}; + + EXPECT_TRUE(test::all_close(expected, read_vector(result))); +} + +NGRAPH_TEST(${BACKEND_NAME}, softmax_axis) +{ + Shape shape{2, 3}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{-10, -20, -30, -40, -50, -60}); + auto result = backend->create_tensor(element::f32, shape); + + auto d0 = expf(-10) + expf(-20) + expf(-30); + auto d1 = expf(-40) + expf(-50) + expf(-60); + + backend->call_with_validate(f, {result}, {a}); + vector expected{expf(-10) / d0, + expf(-20) / d0, + expf(-30) / d0, + expf(-40) / d1, + expf(-50) / d1, + expf(-60) / d1}; + EXPECT_TRUE(test::all_close_f(expected, read_vector(result))); +} + +NGRAPH_TEST(${BACKEND_NAME}, softmax_axis_2) +{ + Shape shape{2, 3}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{-10, -20, -30, -40, -50, -60}); + auto result = backend->create_tensor(element::f32, shape); + + auto d0 = expf(-10) + expf(-40); + auto d1 = expf(-20) + expf(-50); + auto d2 = expf(-30) + expf(-60); + + backend->call_with_validate(f, {result}, {a}); + vector expected{expf(-10) / d0, + expf(-20) / d1, + expf(-30) / d2, + expf(-40) / d0, + expf(-50) / d1, + expf(-60) / d2}; + EXPECT_TRUE(test::all_close(expected, read_vector(result))); +} + +NGRAPH_TEST(${BACKEND_NAME}, softmax_axis_3d_trivial) +{ + Shape shape{1, 2, 3}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{-10, -20, -30, -40, -50, -60}); + auto result = backend->create_tensor(element::f32, shape); + + backend->call_with_validate(f, {result}, {a}); + vector expected{1, 1, 1, 1, 1, 1}; + EXPECT_TRUE(test::all_close(expected, read_vector(result))); +} + +NGRAPH_TEST(${BACKEND_NAME}, softmax_underflow) +{ + Shape shape{2, 3}; + auto A = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto low = std::numeric_limits::lowest(); + + auto a = backend->create_tensor(element::f32, shape); + copy_data(a, vector{low, 1, 2, 3, 4, 5}); + auto result = backend->create_tensor(element::f32, shape); + + auto d0 = expf(low) + expf(3); + auto d1 = expf(1) + expf(4); + auto d2 = expf(2) + expf(5); + + backend->call_with_validate(f, {result}, {a}); + vector expected{ + expf(low) / d0, expf(1) / d1, expf(2) / d2, expf(3) / d0, expf(4) / d1, expf(5) / d2}; + EXPECT_TRUE(test::all_close(expected, read_vector(result))); +} + +NGRAPH_TEST(${BACKEND_NAME}, multiple_backends) +{ + Shape shape{2, 2}; + auto A1 = make_shared(element::f32, shape); + auto B1 = make_shared(element::f32, shape); + auto f = make_shared(A1 + B1, op::ParameterVector{A1, B1}); + + auto A2 = make_shared(element::f32, shape); + auto B2 = make_shared(element::f32, shape); + auto g = make_shared(A2 * B2, op::ParameterVector{A2, B2}); + + auto backend1 = runtime::Backend::create("${BACKEND_NAME}"); + + auto backend2 = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + shared_ptr a1 = backend1->create_tensor(element::f32, shape); + shared_ptr b1 = backend1->create_tensor(element::f32, shape); + shared_ptr result1 = backend1->create_tensor(element::f32, shape); + + shared_ptr a2 = backend2->create_tensor(element::f32, shape); + shared_ptr b2 = backend2->create_tensor(element::f32, shape); + shared_ptr result2 = backend2->create_tensor(element::f32, shape); + + copy_data(a1, test::NDArray({{1, 2}, {3, 4}}).get_vector()); + copy_data(b1, test::NDArray({{5, 6}, {7, 8}}).get_vector()); + + copy_data(a2, test::NDArray({{1, 2}, {3, 4}}).get_vector()); + copy_data(b2, test::NDArray({{5, 6}, {7, 8}}).get_vector()); + + backend1->call_with_validate(f, {result1}, {a1, b1}); + EXPECT_EQ(read_vector(result1), + (test::NDArray({{6, 8}, {10, 12}})).get_vector()); + + backend2->call_with_validate(g, {result2}, {a2, b2}); + EXPECT_EQ(read_vector(result2), + (test::NDArray({{5, 12}, {21, 32}})).get_vector()); +} + +NGRAPH_TEST(${BACKEND_NAME}, tensorview_custom_mem) +{ + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + Shape shape{2, 2}; + + auto make_external = [&]() { + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); + + return f; + }; + + auto f = make_external(); + + vector av{2, 4, 8, 16}; + vector bv{1, 2, 4, 8}; + // use custom mem with tensorview, no need to copy data + auto a = backend->create_tensor(element::f32, shape, av.data()); + auto b = backend->create_tensor(element::f32, shape, bv.data()); + + // use custom mem with result tensorview + vector rv{0, 0, 0, 0}; + auto result = backend->create_tensor(element::f32, shape, rv.data()); + + // result should be in memory without needing explict read + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((vector{2, 2, 2, 2}), rv); +} + +NGRAPH_TEST(${BACKEND_NAME}, validate_call_input_count) +{ + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + Shape shape{2, 2}; + + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); + + auto a = backend->create_tensor(element::f32, shape); + auto b = backend->create_tensor(element::f32, shape); + auto c = backend->create_tensor(element::f32, shape); + + EXPECT_ANY_THROW(backend->call_with_validate(f, {c}, {a})); +} + +NGRAPH_TEST(${BACKEND_NAME}, validate_call_input_type) +{ + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + Shape shape{2, 2}; + + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); + + auto a = backend->create_tensor(element::i32, shape); + auto b = backend->create_tensor(element::f32, shape); + auto c = backend->create_tensor(element::f32, shape); + + EXPECT_ANY_THROW(backend->call_with_validate(f, {c}, {a, b})); +} + +NGRAPH_TEST(${BACKEND_NAME}, validate_call_input_shape) +{ + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + Shape shape{2, 2}; + + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); + + auto a = backend->create_tensor(element::f32, {2, 3}); + auto b = backend->create_tensor(element::f32, shape); + auto c = backend->create_tensor(element::f32, shape); + + EXPECT_ANY_THROW(backend->call_with_validate(f, {c}, {a, b})); +} + +NGRAPH_TEST(${BACKEND_NAME}, validate_call_output_count) +{ + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + Shape shape{2, 2}; + + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); + + auto a = backend->create_tensor(element::f32, shape); + auto b = backend->create_tensor(element::f32, shape); + auto c = backend->create_tensor(element::f32, shape); + auto d = backend->create_tensor(element::f32, shape); + + EXPECT_ANY_THROW(backend->call_with_validate(f, {c, d}, {a, b})); +} + +NGRAPH_TEST(${BACKEND_NAME}, validate_call_output_type) +{ + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + Shape shape{2, 2}; + + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); + + auto a = backend->create_tensor(element::i32, shape); + auto b = backend->create_tensor(element::f32, shape); + auto c = backend->create_tensor(element::f32, shape); + + EXPECT_ANY_THROW(backend->call_with_validate(f, {a}, {b, c})); +} + +NGRAPH_TEST(${BACKEND_NAME}, validate_call_output_shape) +{ + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + Shape shape{2, 2}; + + auto A = make_shared(element::f32, shape); + auto B = make_shared(element::f32, shape); + auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); + + auto a = backend->create_tensor(element::f32, {2, 3}); + auto b = backend->create_tensor(element::f32, shape); + auto c = backend->create_tensor(element::f32, shape); + + EXPECT_ANY_THROW(backend->call_with_validate(f, {a}, {c, b})); +} + +NGRAPH_TEST(${BACKEND_NAME}, logical_and) +{ + Shape shape{2, 2, 2}; + auto A = make_shared(element::boolean, shape); + auto B = make_shared(element::boolean, shape); + auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::boolean, shape); + copy_data(a, vector{1, 0, 1, 1, 1, 0, 1, 0}); + auto b = backend->create_tensor(element::boolean, shape); + copy_data(b, vector{0, 0, 1, 0, 0, 1, 1, 0}); + auto result = backend->create_tensor(element::boolean, shape); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((vector{0, 0, 1, 0, 0, 0, 1, 0}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, logical_or) +{ + Shape shape{2, 2, 2}; + auto A = make_shared(element::boolean, shape); + auto B = make_shared(element::boolean, shape); + auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::boolean, shape); + copy_data(a, vector{1, 0, 1, 1, 1, 0, 1, 0}); + auto b = backend->create_tensor(element::boolean, shape); + copy_data(b, vector{0, 0, 1, 0, 0, 1, 1, 0}); + auto result = backend->create_tensor(element::boolean, shape); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ((vector{1, 0, 1, 1, 1, 1, 1, 0}), read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_b1c2h2w2) +{ + auto input_shape = Shape{1, 2, 2, 2}; + auto input = make_shared(element::f32, input_shape); + auto mean_shape = Shape{2}; + auto var_shape = Shape{2}; + auto gamma_shape = Shape{2}; + auto gamma = make_shared(element::f32, gamma_shape); + auto beta_shape = Shape{2}; + auto beta = make_shared(element::f32, beta_shape); + double eps = 0.001; + auto shape_r = Shape{1, 2, 2, 2}; + auto bn = make_shared(input, gamma, beta, eps); + + auto output_rt = std::make_shared(bn, 0); + auto mean_rt = std::make_shared(bn, 1); + auto variance_rt = std::make_shared(bn, 2); + + auto f = make_shared(NodeVector{output_rt, mean_rt, variance_rt}, + op::ParameterVector{input, gamma, beta}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + auto _input = backend->create_tensor(element::f32, Shape{1, 2, 2, 2}); + + copy_data(_input, + vector{0.54881352f, + 0.71518934f, + 0.60276335f, + 0.54488319f, + 0.42365479f, + 0.64589411f, + 0.4375872f, + 0.89177299f}); + auto _gamma = backend->create_tensor(element::f32, gamma_shape); + copy_data(_gamma, vector{1.0f, 1.0f}); + auto _beta = backend->create_tensor(element::f32, beta_shape); + copy_data(_beta, vector{0.0f, 0.0f}); + auto bn_output = backend->create_tensor(element::f32, shape_r); + auto result_mean = backend->create_tensor(element::f32, mean_shape); + auto result_variance = backend->create_tensor(element::f32, var_shape); + + vector expected_result{-0.71498716f, + 1.48388731f, + -0.00196938f, + -0.76693159f, + -0.91316032f, + 0.23943391f, + -0.84090298f, + 1.51462936f}; + vector expected_mean{0.602912f, 0.599727f}; + vector expected_variance{0.00472505f, 0.0361782f}; + + backend->call_with_validate( + f, {bn_output, result_mean, result_variance}, {_input, _gamma, _beta}); + + EXPECT_TRUE(test::all_close(expected_result, read_vector(bn_output), 1e-5f, 1e-6f)); + EXPECT_TRUE(test::all_close(expected_mean, read_vector(result_mean), 1e-5f, 1e-6f)); + EXPECT_TRUE( + test::all_close(expected_variance, read_vector(result_variance), 1e-5f, 1e-6f)); +} + +NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_b2c2h2w1) +{ + auto input_shape = Shape{2, 2, 2, 1}; + auto input = make_shared(element::f32, input_shape); + auto mean_shape = Shape{2}; + auto var_shape = Shape{2}; + auto gamma_shape = Shape{2}; + auto gamma = make_shared(element::f32, gamma_shape); + auto beta_shape = Shape{2}; + auto beta = make_shared(element::f32, beta_shape); + double eps = 0.001; + auto shape_r = Shape{2, 2, 2, 1}; + auto bn = make_shared(input, gamma, beta, eps); + + auto output_rt = std::make_shared(bn, 0); + auto mean_rt = std::make_shared(bn, 1); + auto variance_rt = std::make_shared(bn, 2); + + auto f = make_shared(NodeVector{output_rt, mean_rt, variance_rt}, + op::ParameterVector{input, gamma, beta}); + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + // Create some tensors for input/output + auto _input = backend->create_tensor(element::f32, input_shape); + copy_data(_input, + vector{0.54881352f, + 0.71518934f, + 0.60276335f, + 0.54488319f, + 0.42365479f, + 0.64589411f, + 0.4375872f, + 0.89177299f}); + + auto _gamma = backend->create_tensor(element::f32, gamma_shape); + copy_data(_gamma, vector{1.0f, 1.0f}); + auto _beta = backend->create_tensor(element::f32, beta_shape); + copy_data(_beta, vector{0.0f, 0.0f}); + auto bn_output = backend->create_tensor(element::f32, shape_r); + auto result_mean = backend->create_tensor(element::f32, mean_shape); + auto result_variance = backend->create_tensor(element::f32, var_shape); + + vector expected_result{ + -0.30327f, 1.1561f, -0.0963782f, -0.434702f, -1.4011f, 0.548275f, -1.06187f, 1.59295f}; + vector expected_mean{0.583388f, 0.619252f}; + vector expected_variance{0.0119972f, 0.0282681f}; + backend->call_with_validate( + f, {bn_output, result_mean, result_variance}, {_input, _gamma, _beta}); + + EXPECT_TRUE(test::all_close(expected_result, read_vector(bn_output))); + EXPECT_TRUE(test::all_close(expected_mean, read_vector(result_mean))); + EXPECT_TRUE( + test::all_close(expected_variance, read_vector(result_variance), 1e-5f, 1e-6f)); +} + +NGRAPH_TEST(${BACKEND_NAME}, batchnorm_bprop_n4c3h2w2) +{ + auto input_shape = Shape{4, 3, 2, 2}; + auto shape_mean = Shape{3}; + auto input = make_shared(element::f32, input_shape); + auto mean_shape = Shape{3}; + auto mean = make_shared(element::f32, mean_shape); + auto var_shape = Shape{3}; + auto var = make_shared(element::f32, var_shape); + auto gamma_shape = Shape{3}; + auto gamma = make_shared(element::f32, gamma_shape); + auto beta_shape = Shape{3}; + auto beta = make_shared(element::f32, beta_shape); + double eps = 0.001; + auto shape_r = Shape{4, 3, 2, 2}; + auto bn = make_shared(input, gamma, beta, eps); + auto bn_dx = make_shared(bn, 0); + auto bn_dgamma = make_shared(bn, 1); + auto bn_dbeta = make_shared(bn, 2); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto _input = backend->create_tensor(element::f32, input_shape); + vector dataInput{ + 10.76331902f, 11.51178265f, 10.31018162f, 12.2993021f, 14.17626667f, 14.63498497f, + 13.63494492f, 13.84248161f, 11.34602547f, 13.22014618f, 10.46686649f, 10.39842987f, + 12.94806862f, 11.71670246f, 14.94438076f, 13.13236618f, 13.40889645f, 12.76128387f, + 11.34430027f, 11.86629677f, 11.11464024f, 10.93221283f, 11.95324039f, 10.96581173f, + 13.05455494f, 14.41404247f, 13.11169434f, 11.26559448f, 10.89965153f, 14.08202171f, + 11.12685776f, 12.58428574f, 12.59247875f, 13.00187492f, 12.66310215f, 10.06655025f, + 12.62048626f, 14.47942352f, 13.84950638f, 10.61425877f, 11.47936344f, 13.06011772f, + 13.63069057f, 12.31748772f, 13.84555244f, 10.95815468f, 12.78933334f, 12.75389099f}; + copy_data(_input, dataInput); + auto _mean = backend->create_tensor(element::f32, mean_shape); + copy_data(_mean, vector{12.56472874f, 12.80312157f, 11.81676865f}); + auto _var = backend->create_tensor(element::f32, var_shape); + copy_data(_var, vector{1.94557643f, 1.32772446f, 1.28163588f}); + + auto _gamma = backend->create_tensor(element::f32, gamma_shape); + copy_data(_gamma, vector{2.0f, 2.0f, 2.0f}); + auto _beta = backend->create_tensor(element::f32, beta_shape); + copy_data(_beta, vector{1.0f, 1.0f, 1.0f}); + auto result = backend->create_tensor(element::f32, shape_r); + + shared_ptr _delta = backend->create_tensor(element::f32, shape_r); + vector deltaData(shape_size(shape_r), 20.0f); + copy_data(_delta, deltaData); + + auto f = make_shared(NodeVector{bn_dx, bn_dgamma, bn_dbeta}, + op::ParameterVector{mean, var, input, gamma, beta}); + + auto C = std::make_shared(element::f32, shape_r); + + auto zero = ngraph::make_zero(bn_dgamma->get_element_type(), bn_dgamma->get_shape()); + ngraph::autodiff::Adjoints adjoints(NodeVector{bn_dx, bn_dgamma, bn_dbeta}, + NodeVector{C, zero, zero}); + + auto dinput = adjoints.backprop_node(input); + auto dgamma = adjoints.backprop_node(gamma); + auto dbeta = adjoints.backprop_node(beta); + + auto df = make_shared(NodeVector{dinput, dgamma, dbeta}, + op::ParameterVector{mean, var, input, gamma, beta, C}); + + // roundtrip serialization + string js = serialize(df, 4); + istringstream in(js); + df = deserialize(in); + + shared_ptr _dinput = backend->create_tensor(element::f32, shape_r); + shared_ptr _dgamma = backend->create_tensor(element::f32, gamma_shape); + shared_ptr _dbeta = backend->create_tensor(element::f32, beta_shape); + + backend->call_with_validate( + df, {_dinput, _dgamma, _dbeta}, {_mean, _var, _input, _gamma, _beta, _delta}); + + vector expected_input{ + 8.17051607e-06f, 4.77576657e-06f, 1.02257760e-05f, 1.20387525e-06f, -1.73868522e-06f, + 3.84632768e-06f, -1.07932050e-05f, -2.57458956e-06f, -2.22166714e-06f, -8.38779043e-06f, + -2.48082982e-06f, 5.89238360e-06f, -2.52895109e-07f, -8.68433445e-06f, -5.82726737e-06f, + 8.84659658e-06f, 3.03944108e-05f, 4.05480879e-05f, 1.84123158e-05f, 2.30061178e-05f, + 1.34087590e-05f, -9.26072571e-07f, -3.22908454e-05f, -2.07365116e-05f, -4.21330941e-05f, + 2.83083100e-05f, -3.71039101e-05f, -4.84390640e-06f, -2.93012376e-05f, 5.68858087e-06f, + 1.83181458e-05f, -1.07494506e-05f, -2.32429103e-06f, 6.92914809e-06f, -6.66512321e-06f, + -7.00302840e-06f, -3.46675184e-06f, -4.36748381e-06f, 6.73822226e-07f, -4.20158993e-06f, + 3.83005061e-06f, 5.85143729e-06f, 4.17875243e-06f, -8.64167783e-06f, 1.00170803e-05f, + -4.23939666e-06f, 4.80201680e-06f, 4.62702078e-06f}; + + ASSERT_TRUE(ngraph::test::all_close(read_vector(_dinput), expected_input, 1e-3f, 1e-4f)); + vector expected_dgamma{7.06315041e-05f, -2.35289335e-04f, -5.06639481e-05f}; + ASSERT_TRUE( + ngraph::test::all_close(read_vector(_dgamma), expected_dgamma, 1e-2f, 1e-3f)); + vector expected_dbeta{320.f, 320.f, 320.f}; + ASSERT_TRUE(ngraph::test::all_close(read_vector(_dbeta), expected_dbeta, 1e-4f, 1e-8f)); +} + +NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_inference_b2c2h2w1) +{ + auto input_shape = Shape{2, 2, 2, 1}; + auto input = make_shared(element::f32, input_shape); + auto mean_shape = Shape{2}; + auto mean = make_shared(element::f32, mean_shape); + auto var_shape = Shape{2}; + auto var = make_shared(element::f32, var_shape); + auto gamma_shape = Shape{2}; + auto gamma = make_shared(element::f32, gamma_shape); + auto beta_shape = Shape{2}; + auto beta = make_shared(element::f32, beta_shape); + double eps = 0.001; + auto shape_r = Shape{2, 2, 2, 1}; + auto bn = make_shared(input, gamma, beta, mean, var, eps); + + auto f = make_shared(bn, op::ParameterVector{input, gamma, beta, mean, var}); + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + // Create some tensors for input/output + auto _input = backend->create_tensor(element::f32, input_shape); + copy_data(_input, + vector{0.54881352f, + 0.71518934f, + 0.60276335f, + 0.54488319f, + 0.42365479f, + 0.64589411f, + 0.4375872f, + 0.89177299f}); + + auto _gamma = backend->create_tensor(element::f32, gamma_shape); + copy_data(_gamma, vector{1.0f, 1.0f}); + auto _beta = backend->create_tensor(element::f32, beta_shape); + copy_data(_beta, vector{0.0f, 0.0f}); + auto _mean = backend->create_tensor(element::f32, mean_shape); + copy_data(_mean, vector{0.583388f, 0.619252f}); + auto _var = backend->create_tensor(element::f32, var_shape); + copy_data(_var, vector{0.0119972f, 0.0282681f}); + auto bn_output = backend->create_tensor(element::f32, shape_r); + + vector expected_result{ + -0.30327f, 1.1561f, -0.0963782f, -0.434702f, -1.4011f, 0.548275f, -1.06187f, 1.59295f}; + backend->call_with_validate(f, {bn_output}, {_input, _gamma, _beta, _mean, _var}); + + ASSERT_TRUE( + ngraph::test::all_close(expected_result, read_vector(bn_output), 1e-3f, 1e-4f)); +} + +#if 0 +NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_globalstats_b2c2w2h1) +{ + auto input_shape = Shape{2, 2, 2, 1}; + auto input = make_shared(element::f32, input_shape); + auto mean_shape = Shape{2}; + auto mean = make_shared(element::f32, mean_shape); + auto var_shape = Shape{2}; + auto var = make_shared(element::f32, var_shape); + auto gamma_shape = Shape{2}; + auto gamma = make_shared(element::f32, gamma_shape); + auto beta_shape = Shape{2}; + auto beta = make_shared(element::f32, beta_shape); + double eps = 0.001; + auto shape_r = Shape{2, 2, 2, 1}; + auto bn = make_shared(input, gamma, beta, mean, var, eps); + + auto f = make_shared(bn, op::ParameterVector{gamma, beta, input, mean, var}); + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + // Create some tensors for input/output + auto _input = backend->create_tensor(element::f32, input_shape); + copy_data(_input, + vector{0.54881352f, + 0.71518934f, + 0.60276335f, + 0.54488319f, + 0.42365479f, + 0.64589411f, + 0.4375872f, + 0.89177299f}); + + auto _gamma = backend->create_tensor(element::f32, gamma_shape); + copy_data(_gamma, vector{1.0f, 1.0f}); + auto _beta = backend->create_tensor(element::f32, beta_shape); + copy_data(_beta, vector{0.0f, 0.0f}); + auto _mean = backend->create_tensor(element::f32, mean_shape); + copy_data(_mean, vector{0.583388f, 0.619252f}); + auto _var = backend->create_tensor(element::f32, var_shape); + copy_data(_var, vector{0.0119972f, 0.0282681f}); + auto bn_output = backend->create_tensor(element::f32, shape_r); + + vector expected_result{ + -0.30327f, 1.1561f, -0.0963782f, -0.434702f, -1.4011f, 0.548275f, -1.06187f, 1.59295f}; + backend->call_with_validate(f, {bn_output}, {_gamma, _beta, _input, _mean, _var}); + + ASSERT_TRUE( + ngraph::test::all_close(expected_result, read_vector(bn_output), 1e-3f, 1e-4f)); +} +#endif + +NGRAPH_TEST(${BACKEND_NAME}, reverse_sequence_n2c3h4w2) +{ + Shape shape{2, 3, 4, 2}; + Shape seq_len_shape{4}; + auto A = make_shared(element::i32, shape); + auto B = make_shared(element::i32, seq_len_shape); + + size_t batch_axis = 2; + size_t sequence_axis = 1; + auto rs = std::make_shared(A, B, batch_axis, sequence_axis); + + auto f = make_shared(rs, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + shared_ptr a = backend->create_tensor(element::i32, shape); + shared_ptr b = backend->create_tensor(element::i32, seq_len_shape); + + shared_ptr result = backend->create_tensor(element::i32, shape); + + std::vector input{ + 0, 0, 3, 0, 6, 0, 9, 0, 1, 0, 4, 0, 7, 0, 10, 0, 2, 0, 5, 0, 8, 0, 11, 0, + 12, 0, 15, 0, 18, 0, 21, 0, 13, 0, 16, 0, 19, 0, 22, 0, 14, 0, 17, 0, 20, 0, 23, 0, + }; + + std::vector seq_lenghts{1, 2, 1, 2}; + copy_data(b, seq_lenghts); + + std::vector expected{ + 0, 0, 4, 0, 6, 0, 10, 0, 1, 0, 3, 0, 7, 0, 9, 0, 2, 0, 5, 0, 8, 0, 11, 0, + + 12, 0, 16, 0, 18, 0, 22, 0, 13, 0, 15, 0, 19, 0, 21, 0, 14, 0, 17, 0, 20, 0, 23, 0}; + + copy_data(a, input); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ(read_vector(result), expected); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_sequence_n4c3h2w2) +{ + Shape shape{4, 3, 2, 2}; + auto A = make_shared(element::i32, shape); + Shape seq_len_shape{4}; + auto B = make_shared(element::i32, seq_len_shape); + + size_t batch_axis = 0; + size_t sequence_axis = 1; + + auto rs = std::make_shared(A, B, batch_axis, sequence_axis); + + auto f = make_shared(rs, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + shared_ptr a = backend->create_tensor(element::i32, shape); + shared_ptr b = backend->create_tensor(element::i32, seq_len_shape); + + shared_ptr result = backend->create_tensor(element::i32, shape); + + std::vector seq_lenghts{1, 2, 3, 3}; + copy_data(b, seq_lenghts); + + std::vector input{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47}; + + std::vector expected{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, + 12, 13, 14, 15, 20, 21, 22, 23, 32, 33, 34, 35, 28, 29, 30, 31, + 24, 25, 26, 27, 44, 45, 46, 47, 40, 41, 42, 43, 36, 37, 38, 39}; + + copy_data(a, input); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ(read_vector(result), expected); +} + +NGRAPH_TEST(${BACKEND_NAME}, reverse_sequence_n4d2c3h2w2) +{ + Shape shape{4, 2, 3, 2, 2}; + auto A = make_shared(element::i32, shape); + Shape seq_len_shape{4}; + auto B = make_shared(element::i32, seq_len_shape); + + size_t batch_axis = 0; + size_t sequence_axis = 2; + + auto rs = std::make_shared(A, B, batch_axis, sequence_axis); + + auto f = make_shared(rs, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create some tensors for input/output + shared_ptr a = backend->create_tensor(element::i32, shape); + shared_ptr b = backend->create_tensor(element::i32, seq_len_shape); + + shared_ptr result = backend->create_tensor(element::i32, shape); + + std::vector input{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; + + std::vector expected{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 28, 29, 30, 31, 24, 25, 26, 27, + 32, 33, 34, 35, 40, 41, 42, 43, 36, 37, 38, 39, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 76, 77, 78, 79, 72, 73, 74, 75, + 80, 81, 82, 83, 88, 89, 90, 91, 84, 85, 86, 87, 92, 93, 94, 95}; + + copy_data(a, input); + + std::vector seq_lenghts{1, 2, 1, 2}; + copy_data(b, seq_lenghts); + + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ(read_vector(result), expected); +} + +NGRAPH_TEST(${BACKEND_NAME}, generate_mask) +{ + Shape scalar{}; + Shape result_shape{1, 128}; + const unsigned int seed = 777; + auto training = op::Constant::create(element::f32, Shape{}, {1}); + auto gen_mask = make_shared(training, result_shape, element::f32, seed, 0.5); + auto gen_mask2 = make_shared(training, result_shape, element::f32, seed, 0.5); + auto f = make_shared(NodeVector{gen_mask, gen_mask2}, op::ParameterVector{}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto is_not_zero_or_one = [](float num) { return num != 0.f && num != 1.f; }; + + auto result_tv1 = backend->create_tensor(result_shape); + auto result_tv2 = backend->create_tensor(result_shape); + backend->call_with_validate(f, {result_tv1, result_tv2}, {}); + auto result1 = read_vector(result_tv1); + auto result2 = read_vector(result_tv2); + ASSERT_EQ(result1, result2); + ASSERT_FALSE(std::any_of(result1.begin(), result1.end(), is_not_zero_or_one)); + backend->call_with_validate(f, {result_tv1, result_tv2}, {}); + auto result1_2 = read_vector(result_tv1); + auto result2_2 = read_vector(result_tv2); + ASSERT_NE(result1, result1_2); + ASSERT_FALSE(std::any_of(result1_2.begin(), result1_2.end(), is_not_zero_or_one)); + ASSERT_NE(result2, result2_2); + ASSERT_FALSE(std::any_of(result2_2.begin(), result2_2.end(), is_not_zero_or_one)); +} + +NGRAPH_TEST(${BACKEND_NAME}, quantize) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape; + AxisSet quantization_axes; + + auto input_type = element::f32; + auto output_type = element::u8; + + typedef float input_c_type; + typedef uint8_t output_c_type; + + op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(input_type, scale_offset_shape, {2}); + auto offset = op::Constant::create(output_type, scale_offset_shape, {1}); + auto quantize = + make_shared(X, scale, offset, output_type, quantization_axes, round_mode); + auto f = make_shared(quantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + // divide by scale 2 2 2 2 2 2 2 2 2 2 2 2 + // equals (rounded) 0 1 1 2 2 3 3 4 4 5 5 6 + // plus offset 1 1 1 1 1 1 1 1 1 1 1 1 + // equals 1 2 2 3 3 4 4 5 5 6 6 7 + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ((vector{1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, dequantize) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape; + AxisSet quantization_axes; + + auto input_type = element::u8; + auto output_type = element::f32; + + typedef uint8_t input_c_type; + typedef float output_c_type; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(output_type, scale_offset_shape, {2}); + auto offset = op::Constant::create(input_type, scale_offset_shape, {1}); + auto dequantize = make_shared(X, scale, offset, output_type, quantization_axes); + auto f = make_shared(dequantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7}); + // minus offset 1 1 1 1 1 1 1 1 1 1 1 1 + // eqauls 0 1 1 2 2 3 3 4 4 5 5 6 + // multiplied by scale 2 2 2 2 2 2 2 2 2 2 2 2 + // equals 0 2 2 4 4 6 6 8 8 10 10 12 + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ((vector{0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, dequantize_zero_offset) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape; + AxisSet quantization_axes; + + auto input_type = element::u8; + auto output_type = element::f32; + + typedef uint8_t input_c_type; + typedef float output_c_type; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(output_type, scale_offset_shape, {2}); + auto offset = op::Constant::create(input_type, scale_offset_shape, {0}); + auto dequantize = make_shared(X, scale, offset, output_type, quantization_axes); + auto f = make_shared(dequantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7}); + // minus offset 0 0 0 0 0 0 0 0 0 0 0 0 + // multiplied by scale 2 2 2 2 2 2 2 2 2 2 2 2 + // equals 2 4 4 6 6 8 8 10 10 12 12 14 + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ((vector{2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, quantize_axes) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape{4}; + AxisSet quantization_axes{0}; + + auto input_type = element::f32; + auto output_type = element::u8; + + typedef float input_c_type; + typedef uint8_t output_c_type; + + op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(input_type, scale_offset_shape, {2, 3, 4, 5}); + auto offset = op::Constant::create(output_type, scale_offset_shape, {10, 20, 30, 40}); + auto quantize = + make_shared(X, scale, offset, output_type, quantization_axes, round_mode); + auto f = make_shared(quantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + // divided by scale 2 2 2 3 3 3 4 4 4 5 5 5 + // equals (rounded) 0 1 1 1 1 2 2 2 2 2 2 2 + // plus offset 10 10 10 20 20 20 30 30 30 40 40 40 + // equals 10 11 11 21 21 22 32 32 32 42 42 42 + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ((vector{10, 11, 11, 21, 21, 22, 32, 32, 32, 42, 42, 42}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, dequantize_axes) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape{4}; + AxisSet quantization_axes{0}; + + auto input_type = element::u8; + auto output_type = element::f32; + + typedef uint8_t input_c_type; + typedef float output_c_type; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(output_type, scale_offset_shape, {2, 3, 4, 5}); + auto offset = op::Constant::create(input_type, scale_offset_shape, {10, 20, 30, 40}); + auto dequantize = make_shared(X, scale, offset, output_type, quantization_axes); + auto f = make_shared(dequantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{10, 11, 11, 21, 21, 22, 32, 32, 32, 42, 42, 42}); + // minus offset 10 10 10 20 20 20 30 30 30 40 40 40 + // equals 0 1 1 1 1 2 2 2 2 2 2 2 + // multiplied by scale 2 2 2 3 3 3 4 4 4 5 5 5 + // equals 0 2 2 3 3 6 8 8 8 10 10 10 + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ((vector{0, 2, 2, 3, 3, 6, 8, 8, 8, 10, 10, 10}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, quantize_int8) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape; + AxisSet quantization_axes; + + auto input_type = element::f32; + auto output_type = element::i8; + + typedef float input_c_type; + typedef int8_t output_c_type; + + op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(input_type, scale_offset_shape, {2}); + auto offset = op::Constant::create(output_type, scale_offset_shape, {1}); + auto quantize = + make_shared(X, scale, offset, output_type, quantization_axes, round_mode); + auto f = make_shared(quantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11}); + // divide by scale 2 2 2 2 2 2 2 2 2 2 2 2 + // equals (rounded) 0 -1 1 -2 2 -3 3 -4 4 -5 5 -6 + // plus offset 1 1 1 1 1 1 1 1 1 1 1 1 + // equals 1 0 2 -1 3 -2 4 -3 5 -4 6 -5 + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ((vector{1, 0, 2, -1, 3, -2, 4, -3, 5, -4, 6, -5}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, dequantize_int8) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape; + AxisSet quantization_axes; + + auto input_type = element::i8; + auto output_type = element::f32; + + typedef int8_t input_c_type; + typedef float output_c_type; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(output_type, scale_offset_shape, {2}); + auto offset = op::Constant::create(input_type, scale_offset_shape, {1}); + auto dequantize = make_shared(X, scale, offset, output_type, quantization_axes); + auto f = make_shared(dequantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{1, 0, 2, -1, 3, -2, 4, -3, 5, -4, 6, -5}); + // minus offset 1 1 1 1 1 1 1 1 1 1 1 1 + // equals 0 -1 1 -2 2 -3 3 -4 4 -5 5 -6 + // multiplied by scale 2 2 2 2 2 2 2 2 2 2 2 2 + // equals 0 -2 2 -4 4 -6 6 -8 8 -10 10 -12 + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ((vector{0, -2, 2, -4, 4, -6, 6, -8, 8, -10, 10, -12}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, quantize_clamp) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape; + AxisSet quantization_axes; + + auto input_type = element::f32; + auto output_type = element::i8; + + typedef float input_c_type; + typedef int8_t output_c_type; + + op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(input_type, scale_offset_shape, {0.00001}); + auto offset = op::Constant::create(output_type, scale_offset_shape, {1}); + auto quantize = + make_shared(X, scale, offset, output_type, quantization_axes, round_mode); + auto f = make_shared(quantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11}); + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ( + (vector{1, -128, 127, -128, 127, -128, 127, -128, 127, -128, 127, -128}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_NEAREST_TOWARD_ZERO) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape; + AxisSet quantization_axes; + + auto input_type = element::f32; + auto output_type = element::i8; + + typedef float input_c_type; + typedef int8_t output_c_type; + + op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_ZERO; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); + auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); + auto quantize = + make_shared(X, scale, offset, output_type, quantization_axes, round_mode); + auto f = make_shared(quantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); + // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 + // equals (rounded) 2 2 3 -2 -2 -3 3 3 4 -3 -3 -4 + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ((vector{2, 2, 3, -2, -2, -3, 3, 3, 4, -3, -3, -4}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_NEAREST_UPWARD) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape; + AxisSet quantization_axes; + + auto input_type = element::f32; + auto output_type = element::i8; + + typedef float input_c_type; + typedef int8_t output_c_type; + + op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_UPWARD; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); + auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); + auto quantize = + make_shared(X, scale, offset, output_type, quantization_axes, round_mode); + auto f = make_shared(quantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); + // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 + // equals (rounded) 2 3 3 -2 -2 -3 3 4 4 -3 -3 -4 + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ((vector{2, 3, 3, -2, -2, -3, 3, 4, 4, -3, -3, -4}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_NEAREST_DOWNWARD) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape; + AxisSet quantization_axes; + + auto input_type = element::f32; + auto output_type = element::i8; + + typedef float input_c_type; + typedef int8_t output_c_type; + + op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_DOWNWARD; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); + auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); + auto quantize = + make_shared(X, scale, offset, output_type, quantization_axes, round_mode); + auto f = make_shared(quantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); + // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 + // equals (rounded) 2 2 3 -2 -3 -3 3 3 4 -3 -4 -4 + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ((vector{2, 2, 3, -2, -3, -3, 3, 3, 4, -3, -4, -4}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_NEAREST_TOWARD_EVEN) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape; + AxisSet quantization_axes; + + auto input_type = element::f32; + auto output_type = element::i8; + + typedef float input_c_type; + typedef int8_t output_c_type; + + op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); + auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); + auto quantize = + make_shared(X, scale, offset, output_type, quantization_axes, round_mode); + auto f = make_shared(quantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); + // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 + // equals (rounded) 2 2 3 -2 -2 -3 3 4 4 -3 -4 -4 + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ((vector{2, 2, 3, -2, -2, -3, 3, 4, 4, -3, -4, -4}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_TOWARD_INFINITY) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape; + AxisSet quantization_axes; + + auto input_type = element::f32; + auto output_type = element::i8; + + typedef float input_c_type; + typedef int8_t output_c_type; + + op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_TOWARD_INFINITY; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); + auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); + auto quantize = make_shared( + X, + scale, + offset, + output_type, + quantization_axes, + static_cast(static_cast(round_mode))); + auto f = make_shared(quantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); + // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 + // equals (rounded) 3 3 3 -3 -3 -3 4 4 4 -4 -4 -4 + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ((vector{3, 3, 3, -3, -3, -3, 4, 4, 4, -4, -4, -4}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_TOWARD_ZERO) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape; + AxisSet quantization_axes; + + auto input_type = element::f32; + auto output_type = element::i8; + + typedef float input_c_type; + typedef int8_t output_c_type; + + op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_TOWARD_ZERO; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); + auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); + auto quantize = make_shared( + X, + scale, + offset, + output_type, + quantization_axes, + static_cast(static_cast(round_mode))); + auto f = make_shared(quantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); + // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 + // equals (rounded) 2 2 2 -2 -2 -2 3 3 3 -3 -3 -3 + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ((vector{2, 2, 2, -2, -2, -2, 3, 3, 3, -3, -3, -3}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_UP) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape; + AxisSet quantization_axes; + + auto input_type = element::f32; + auto output_type = element::i8; + + typedef float input_c_type; + typedef int8_t output_c_type; + + op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_UP; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); + auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); + auto quantize = + make_shared(X, scale, offset, output_type, quantization_axes, round_mode); + auto f = make_shared(quantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); + // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 + // equals (rounded) 3 3 3 -2 -2 -2 4 4 4 -3 -3 -3 + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ((vector{3, 3, 3, -2, -2, -2, 4, 4, 4, -3, -3, -3}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_DOWN) +{ + Shape input_shape{4, 3}; + Shape scale_offset_shape; + AxisSet quantization_axes; + + auto input_type = element::f32; + auto output_type = element::i8; + + typedef float input_c_type; + typedef int8_t output_c_type; + + op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_DOWN; + + auto X = make_shared(input_type, input_shape); + auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); + auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); + auto quantize = + make_shared(X, scale, offset, output_type, quantization_axes, round_mode); + auto f = make_shared(quantize, op::ParameterVector{X}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + auto x = backend->create_tensor(input_type, input_shape); + auto y = backend->create_tensor(output_type, input_shape); + + copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); + // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 + // equals (rounded) 2 2 2 -3 -3 -3 3 3 3 -4 -4 -4 + + backend->call_with_validate(f, {y}, {x}); + EXPECT_EQ((vector{2, 2, 2, -3, -3, -3, 3, 3, 3, -4, -4, -4}), + read_vector(y)); +} + +NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_bprop) +{ + Shape sca{1}; + Shape vec{1, 1, 1, 2}; + double eps = 1.0e-04; + + auto g = std::make_shared(element::f32, sca); + auto b = std::make_shared(element::f32, sca); + auto input = std::make_shared(element::f32, vec); + auto bn_fp = std::make_shared(input, g, b, eps); + auto bnorm = std::make_shared(bn_fp, 0); + auto mean = std::make_shared(bn_fp, 1); + auto var = std::make_shared(bn_fp, 2); + + auto delta = std::make_shared(element::f32, vec); + auto bn_bp = + std::make_shared(bnorm, g, b, mean, var, delta, eps); + auto dx = std::make_shared(bn_bp, 0); + + std::vector> args = { + {1.0f}, // gamma + {1.0f}, // beta + {1.1f, 1.0f}, // x + {1.0f, 1.0f}, // dy + }; + + auto func = std::make_shared(dx, op::ParameterVector{g, b, input, delta}); + auto results = execute(func, args, "${BACKEND_NAME}"); + EXPECT_TRUE(test::all_close_f(std::vector{350.957, -388.67}, results.at(0))); +} + +NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_bprop_2step) +{ + Shape sca{1}; + Shape vec{1, 1, 1, 2}; + double eps = 1.0e-04; + + auto g = std::make_shared(element::f32, sca); + auto b = std::make_shared(element::f32, sca); + auto input = std::make_shared(element::f32, vec); + auto bn_fp = std::make_shared(input, g, b, eps); + auto bnorm = std::make_shared(bn_fp, 0); + auto mean = std::make_shared(bn_fp, 1); + auto var = std::make_shared(bn_fp, 2); + + auto func_bn = + std::make_shared(NodeVector{bnorm, mean, var}, op::ParameterVector{g, b, input}); + + std::vector> args = { + {1.0f}, // gamma + {1.0f}, // beta + {1.1f, 1.0f}, // x + }; + auto results = execute(func_bn, args, "${BACKEND_NAME}"); + + g = std::make_shared(element::f32, sca); + b = std::make_shared(element::f32, sca); + auto bn_output = std::make_shared(element::f32, vec); + auto m = std::make_shared(element::f32, sca); + auto v = std::make_shared(element::f32, sca); + auto delta = std::make_shared(element::f32, vec); + auto bn_bp = std::make_shared(bn_output, g, b, m, v, delta, eps); + auto dx = std::make_shared(bn_bp, 0); + + args.pop_back(); // remove x + args.push_back(results.at(0)); // bn_output + args.push_back(results.at(1)); // m + args.push_back(results.at(2)); // v + args.push_back({1.0f, 1.0f}); // dy + + auto func = std::make_shared(dx, op::ParameterVector{g, b, bn_output, m, v, delta}); + results = execute(func, args, "${BACKEND_NAME}"); + EXPECT_TRUE(test::all_close_f(std::vector{350.957, -388.67}, results.at(0))); +} + +NGRAPH_TEST(${BACKEND_NAME}, shape_of_scalar) +{ + Shape input_shape{}; + Shape output_shape{0}; + + auto A = std::make_shared(element::f32, input_shape); + auto f = std::make_shared(std::make_shared(A), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, input_shape); + copy_data(a, vector{0}); + auto result = backend->create_tensor(element::u64, output_shape); + + backend->call_with_validate(f, {result}, {a}); + vector expected{}; + EXPECT_EQ(expected, read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, shape_of_vector) +{ + Shape input_shape{2}; + Shape output_shape{1}; + + auto A = std::make_shared(element::f32, input_shape); + auto f = std::make_shared(std::make_shared(A), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, input_shape); + copy_data(a, vector(2, 0)); + auto result = backend->create_tensor(element::u64, output_shape); + + backend->call_with_validate(f, {result}, {a}); + vector expected{2}; + EXPECT_EQ(expected, read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, shape_of_matrix) +{ + Shape input_shape{2, 4}; + Shape output_shape{2}; + + auto A = std::make_shared(element::f32, input_shape); + auto f = std::make_shared(std::make_shared(A), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, input_shape); + copy_data(a, vector(2 * 4, 0)); + auto result = backend->create_tensor(element::u64, output_shape); + + backend->call_with_validate(f, {result}, {a}); + vector expected{2, 4}; + EXPECT_EQ(expected, read_vector(result)); +} + +NGRAPH_TEST(${BACKEND_NAME}, shape_of_5d) +{ + Shape input_shape{2, 4, 8, 16, 32}; + Shape output_shape{5}; + + auto A = std::make_shared(element::f32, input_shape); + auto f = std::make_shared(std::make_shared(A), op::ParameterVector{A}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + auto a = backend->create_tensor(element::f32, input_shape); + copy_data(a, vector(2 * 4 * 8 * 16 * 32, 0)); + auto result = backend->create_tensor(element::u64, output_shape); + + backend->call_with_validate(f, {result}, {a}); + vector expected{2, 4, 8, 16, 32}; + EXPECT_EQ(expected, read_vector(result)); +} diff --git a/test/cpu_fusion.cpp b/test/cpu_fusion.cpp index 481814c8d7e..5c7957dde7e 100644 --- a/test/cpu_fusion.cpp +++ b/test/cpu_fusion.cpp @@ -718,7 +718,7 @@ TEST(cpu_fusion, batchnorm_fprop_relu_b1c2h2w2) auto beta = make_shared(element::f32, beta_shape); double eps = 0.001; auto shape_r = Shape{1, 2, 2, 2}; - auto bn = make_shared(eps, gamma, beta, input); + auto bn = make_shared(input, gamma, beta, eps); auto output_rt = std::make_shared(bn, 0); // Note, op::Splice is used to break Relu(BatchNorm) fusion @@ -1082,8 +1082,8 @@ shared_ptr gen_groupconv_batchnorm(const bool add_goe, // Adding a goe will stop fusion since the patterns wont expect to see this op auto bn = - add_goe ? std::make_shared(eps, gamma, beta, goe_bn, mean, var) - : std::make_shared(eps, gamma, beta, group_conv, mean, var); + add_goe ? std::make_shared(goe_bn, gamma, beta, mean, var, eps) + : std::make_shared(group_conv, gamma, beta, mean, var, eps); if (with_relu) { auto prelu = std::make_shared(bn); @@ -1767,7 +1767,7 @@ TEST(cpu_fusion, conv_batch_norm_folding) auto mean = std::make_shared(element::f32, shape_norm); auto var = std::make_shared(element::f32, shape_norm); auto conv = std::make_shared(input, weights, Strides{1, 1}, Strides{1, 1}); - auto bn = std::make_shared(eps, gamma, beta, conv, mean, var); + auto bn = std::make_shared(conv, gamma, beta, mean, var, eps); auto f = make_shared(NodeVector{bn}, op::ParameterVector{input, weights, gamma, beta, mean, var}); return f; @@ -1829,7 +1829,7 @@ TEST(cpu_fusion, convbias_batch_norm_folding) auto conv = std::make_shared(input, weights, Strides{1, 1}, Strides{1, 1}); auto convbias = conv + std::make_shared(bias, conv->get_shape(), AxisSet{0, 2, 3}); - auto bn = std::make_shared(eps, gamma, beta, convbias, mean, var); + auto bn = std::make_shared(convbias, gamma, beta, mean, var, eps); auto f = make_shared( NodeVector{bn}, op::ParameterVector{input, weights, bias, gamma, beta, mean, var}); return f; diff --git a/test/cpu_fusion.cpp-41c1ba06 b/test/cpu_fusion.cpp-41c1ba06 new file mode 100644 index 00000000000..e377ab0f432 --- /dev/null +++ b/test/cpu_fusion.cpp-41c1ba06 @@ -0,0 +1,3132 @@ +//***************************************************************************** +// Copyright 2017-2018 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "ngraph/autodiff/adjoints.hpp" +#include "ngraph/file_util.hpp" +#include "ngraph/graph_util.hpp" +#include "ngraph/log.hpp" +#include "ngraph/ngraph.hpp" +#include "ngraph/op/batch_norm.hpp" +#include "ngraph/op/concat.hpp" +#include "ngraph/op/get_output_element.hpp" +#include "ngraph/op/max_pool.hpp" +#include "ngraph/op/negative.hpp" +#include "ngraph/op/parameter.hpp" +#include "ngraph/op/relu.hpp" +#include "ngraph/op/sigmoid.hpp" +#include "ngraph/op/sum.hpp" +#include "ngraph/op/tanh.hpp" +#include "ngraph/pass/algebraic_simplification.hpp" +#include "ngraph/pass/core_fusion.hpp" +#include "ngraph/pass/graph_rewrite.hpp" +#include "ngraph/pass/manager.hpp" +#include "ngraph/pass/reshape_elimination.hpp" +#include "ngraph/pass/visualize_tree.hpp" +#include "ngraph/pattern/matcher.hpp" +#include "ngraph/pattern/op/label.hpp" +#include "ngraph/pattern/op/skip.hpp" +#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp" +#include "ngraph/runtime/cpu/cpu_tensor_view.hpp" +#include "ngraph/runtime/cpu/op/batch_dot.hpp" +#include "ngraph/runtime/cpu/op/batch_norm_relu.hpp" +#include "ngraph/runtime/cpu/op/bounded_relu.hpp" +#include "ngraph/runtime/cpu/op/conv_add.hpp" +#include "ngraph/runtime/cpu/op/conv_bias.hpp" +#include "ngraph/runtime/cpu/op/conv_relu.hpp" +#include "ngraph/runtime/cpu/op/convert_layout.hpp" +#include "ngraph/runtime/cpu/op/group_conv.hpp" +#include "ngraph/runtime/cpu/op/group_conv_bias.hpp" +#include "ngraph/runtime/cpu/op/loop_kernel.hpp" +#include "ngraph/runtime/cpu/op/lstm.hpp" +#include "ngraph/runtime/cpu/op/matmul_bias.hpp" +#include "ngraph/runtime/cpu/op/rnn.hpp" +#include "ngraph/runtime/cpu/op/sigmoid_mul.hpp" +#include "ngraph/runtime/cpu/pass/cpu_concat_inputs.hpp" +#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp" +#include "ngraph/runtime/cpu/pass/cpu_loop_kernel_fusion.hpp" +#include "ngraph/runtime/cpu/pass/cpu_mat_fusion.hpp" +#include "ngraph/runtime/cpu/pass/cpu_post_layout_optimizations.hpp" +#include "ngraph/runtime/cpu/pass/cpu_rnn_fusion.hpp" +#include "ngraph/runtime/cpu/pass/cpu_workspace_insertion.hpp" +#include "ngraph/serializer.hpp" +#include "ngraph/util.hpp" +#include "nlohmann/json.hpp" +#include "util/all_close.hpp" +#include "util/autodiff/backprop_function.hpp" +#include "util/autodiff/numeric_compare.hpp" +#include "util/matcher.hpp" +#include "util/random.hpp" +#include "util/random.hpp" +#include "util/test_tools.hpp" + +using namespace ngraph; +using namespace std; + +TEST(cpu_fusion, gemm_pattern) +{ + Shape shape_w{2, 4}; + Shape shape_x{4, 1}; + Shape shape_b{1}; + auto A = make_shared(element::f32, shape_w); + auto B = make_shared(element::f32, shape_x); + auto C = make_shared(element::f32, shape_b); + + auto dot = make_shared(A, B); + auto broadcast = make_shared(C, dot->get_shape(), AxisSet{0}); + auto add = dot + broadcast; + + auto W = std::make_shared(A); + auto x = std::make_shared(B); + + auto reshape_pred = [](std::shared_ptr n) { + return static_cast(std::dynamic_pointer_cast(n)); + }; + + auto skip_w = std::make_shared(W, reshape_pred); + auto skip_x = std::make_shared(x, reshape_pred); + + auto pdot = make_shared(skip_w, skip_x); + auto b = std::make_shared(C); + auto pbroadcast = make_shared(b, dot->get_shape(), AxisSet{0}); + auto padd = pdot + pbroadcast; + + TestMatcher n(nullptr); + ASSERT_TRUE(n.match(padd, add)); + ASSERT_EQ(n.get_pattern_map()[W], A); + ASSERT_EQ(n.get_pattern_map()[x], B); + ASSERT_EQ(n.get_pattern_map()[b], C); + + auto reshape_w = make_shared(A, AxisVector{1, 0}, W->get_shape()); + auto reshape_x = make_shared(B, AxisVector{1, 0}, x->get_shape()); + auto re_dot = make_shared(reshape_w, reshape_x); + auto re_add = re_dot + broadcast; + ASSERT_TRUE(n.match(padd, re_add)); + ASSERT_EQ(n.get_pattern_map()[W], A); + ASSERT_EQ(n.get_pattern_map()[x], B); + ASSERT_EQ(n.get_pattern_map()[b], C); + + auto cg = make_shared( + W, x, C, W->get_shape(), x->get_shape(), false, false, AxisSet{0}); +} + +TEST(cpu_fusion, gemm_cpu_broadcast_row) +{ + Shape shapeA{3, 2}; + Shape shapeB{2, 3}; + Shape shapeC{2, 2}; + auto A = make_shared(element::f32, shapeA); + auto B = make_shared(element::f32, shapeB); + + auto bias = op::Constant::create(element::f32, Shape{2}, std::vector{2.0f, 3.0f}); + + auto cg = make_shared( + A, B, bias, A->get_shape(), B->get_shape(), true, true, AxisSet{0}); + + auto f = make_shared(cg, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("CPU"); + + shared_ptr a = backend->create_tensor(element::f32, shapeA); + shared_ptr b = backend->create_tensor(element::f32, shapeB); + shared_ptr result = backend->create_tensor(element::f32, shapeC); + + vector dataA{1.0f, 4.0f, 1.0f, 4.0f, 1.0f, 4.0f}; + vector dataB{3.0f, 3.0f, 3.0f, 9.0f, 9.0f, 9.0f}; + copy_data(a, dataA); + copy_data(b, dataB); + + backend->call_with_validate(f, {result}, {a, b}); + vector expected{11, 30, 38, 111}; + EXPECT_EQ(read_vector(result), expected); +} + +TEST(cpu_fusion, gemm_cpu_broadcast_column) +{ + Shape shapeA{3, 2}; + Shape shapeB{2, 3}; + Shape shapeC{2, 2}; + auto A = make_shared(element::f32, shapeA); + auto B = make_shared(element::f32, shapeB); + + auto bias = op::Constant::create(element::f32, Shape{2}, std::vector{2.0f, 3.0f}); + + auto cg = make_shared( + A, B, bias, A->get_shape(), B->get_shape(), true, true, AxisSet{1}); + + auto f = make_shared(cg, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("CPU"); + + shared_ptr a = backend->create_tensor(element::f32, shapeA); + shared_ptr b = backend->create_tensor(element::f32, shapeB); + shared_ptr result = backend->create_tensor(element::f32, shapeC); + + vector dataA{1.0f, 4.0f, 1.0f, 4.0f, 1.0f, 4.0f}; + vector dataB{3.0f, 3.0f, 3.0f, 9.0f, 9.0f, 9.0f}; + copy_data(a, dataA); + copy_data(b, dataB); + + backend->call_with_validate(f, {result}, {a, b}); + vector expected{11, 29, 39, 111}; + EXPECT_EQ(read_vector(result), expected); +} + +TEST(cpu_fusion, gemm_cpu_broadcast_matrix) +{ + Shape shapeA{3, 2}; + Shape shapeB{2, 3}; + Shape shapeC{2, 2}; + auto A = make_shared(element::f32, shapeA); + auto B = make_shared(element::f32, shapeB); + + auto reshape_w = make_shared(A, AxisVector{1, 0}, Shape{2, 3}); + auto reshape_x = make_shared(B, AxisVector{1, 0}, Shape{3, 2}); + + auto one = op::Constant::create(element::f32, Shape{}, std::vector{1.0f}); + + auto broadcast = make_shared(one, shapeC, AxisSet{0, 1}); + auto cg = make_shared( + A, B, one, A->get_shape(), B->get_shape(), true, true, AxisSet{0, 1}); + + auto f = make_shared(cg, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("CPU"); + + shared_ptr a = backend->create_tensor(element::f32, shapeA); + shared_ptr b = backend->create_tensor(element::f32, shapeB); + shared_ptr result = backend->create_tensor(element::f32, shapeC); + + vector dataA{1.0f, 4.0f, 1.0f, 4.0f, 1.0f, 4.0f}; + vector dataB{3.0f, 3.0f, 3.0f, 9.0f, 9.0f, 9.0f}; + copy_data(a, dataA); + copy_data(b, dataB); + + backend->call_with_validate(f, {result}, {a, b}); + vector expected{10, 28, 37, 109}; + ASSERT_TRUE(read_vector(result) == expected); +} + +TEST(cpu_fusion, gemm_cpu_no_bias) +{ + auto shapeA = Shape{3, 2}; + auto shapeB = Shape{2, 3}; + auto shapeC = Shape{2, 2}; + auto A = make_shared(element::f32, shapeA); + auto B = make_shared(element::f32, shapeB); + + auto reshape_w = make_shared(A, AxisVector{1, 0}, Shape{2, 3}); + auto reshape_x = make_shared(B, AxisVector{1, 0}, Shape{3, 2}); + + auto cg = + make_shared(A, B, nullptr, A->get_shape(), B->get_shape(), true, true); + + auto f = make_shared(cg, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("CPU"); + + shared_ptr a = backend->create_tensor(element::f32, shapeA); + shared_ptr b = backend->create_tensor(element::f32, shapeB); + shared_ptr result = backend->create_tensor(element::f32, shapeC); + + vector dataA{1.0f, 4.0f, 1.0f, 4.0f, 1.0f, 4.0f}; + vector dataB{3.0f, 3.0f, 3.0f, 9.0f, 9.0f, 9.0f}; + copy_data(a, dataA); + copy_data(b, dataB); + + backend->call_with_validate(f, {result}, {a, b}); + vector expected{9, 27, 36, 108}; + ASSERT_TRUE(read_vector(result) == expected); +} + +TEST(cpu_fusion, cpu_fusion_pass_basic) +{ + Shape shape{}; + Shape shape_w{2, 4}; + Shape shape_x{4, 1}; + Shape shape_b{1}; + auto A = make_shared(element::f32, shape_w); + auto B = make_shared(element::f32, shape_x); + auto C = make_shared(element::f32, shape_b); + + auto dot = make_shared(A, B); + auto broadcast = make_shared(C, dot->get_shape(), AxisSet{0}); + auto add = dot + broadcast; + auto graph = make_shared(add); + pass::Manager pass_manager; + pass_manager.register_pass( + runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); + auto func = make_shared(graph, op::ParameterVector{A, B, C}); + pass_manager.run_passes(func); + ASSERT_NE(std::dynamic_pointer_cast(graph->get_argument(0)), nullptr); +} + +TEST(cpu_fusion, commutative_matmul_bias) +{ + Shape shape{}; + Shape shape_w{2, 4}; + Shape shape_x{4, 1}; + Shape shape_b{1}; + auto A = make_shared(element::f32, shape_w); + auto B = make_shared(element::f32, shape_x); + auto C = make_shared(element::f32, shape_b); + + auto dot = make_shared(A, B); + auto broadcast = make_shared(C, dot->get_shape(), AxisSet{0}); + auto add = broadcast + dot; + auto graph = make_shared(add); + pass::Manager pass_manager; + pass_manager.register_pass( + runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); + auto func = make_shared(graph, op::ParameterVector{A, B, C}); + pass_manager.run_passes(func); + ASSERT_NE(std::dynamic_pointer_cast(graph->get_argument(0)), nullptr); +} + +TEST(cpu_fusion, cpu_fusion_pass_matmul_bias) +{ + Shape shape_w{2, 4}; + Shape shape_x{4, 1}; + Shape shape_b{1}; + auto W = make_shared(element::f32, shape_w); + auto x = make_shared(element::f32, shape_x); + auto b = make_shared(element::f32, shape_b); + + auto mmb = std::make_shared( + W, x, nullptr, W->get_shape(), x->get_shape(), false, false); + auto broadcast = std::make_shared(b, mmb->get_shape(), AxisSet{0}); + auto add = mmb + broadcast; + + auto graph = make_shared(add); + pass::Manager pass_manager; + pass_manager.register_pass( + runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); + auto func = make_shared(graph, op::ParameterVector{W, x, b}); + pass_manager.run_passes(func); + auto gmm = graph->get_argument(0); + ASSERT_TRUE(std::dynamic_pointer_cast(gmm)); + ASSERT_EQ(gmm->get_argument(2), b); +} + +TEST(cpu_fusion, cpu_fusion_pass_matmul_no_bias) +{ + Shape shape_w{4, 2}; + Shape shape_x{1, 4}; + auto W = make_shared(element::f32, shape_w); + auto x = make_shared(element::f32, shape_x); + + auto reshape_w = std::make_shared(W, AxisVector{1, 0}, Shape{2, 4}); + auto reshape_x = std::make_shared(x, AxisVector{1, 0}, Shape{4, 1}); + auto re_dot = make_shared(reshape_w, reshape_x); + auto graph = make_shared(re_dot); + + pass::Manager pass_manager; + pass_manager.register_pass( + runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); + auto func = make_shared(graph, op::ParameterVector{W, x}); + pass_manager.run_passes(func); + size_t mmb = count_ops_of_type(func); + ASSERT_EQ(mmb, 1); +} + +TEST(cpu_fusion, gemm_mlp) +{ + const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json"); + const string json_string = file_util::read_file_to_string(json_path); + stringstream ss(json_string); + shared_ptr func = ngraph::deserialize(ss); + pass::Manager pass_manager; + pass_manager.register_pass( + runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); + pass_manager.run_passes(func); + auto mmbs = count_ops_of_type(func); + ASSERT_EQ(mmbs, 3); +} + +TEST(cpu_fusion, fuse_fprop_bn) +{ + pass::Manager pass_manager; + pass_manager.register_pass("bn_fprop_before_fusion.png"); + pass_manager.register_pass(); + pass_manager.register_pass( + runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); + pass_manager.register_pass("bn_fprop_after_fusion.png"); + const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/bn_fprop_b2c3h2w2.json"); + const string json_string = file_util::read_file_to_string(json_path); + stringstream ss(json_string); + shared_ptr func = ngraph::deserialize(ss); + pass_manager.run_passes(func); + size_t ccg = count_ops_of_type(func); + ASSERT_EQ(ccg, 1); +} + +TEST(cpu_fusion, zero_padded_reshaped_conv) +{ + auto X = make_shared(element::f32, Shape{1, 2, 2, 1}); + auto F = make_shared(element::f32, Shape{1, 1, 1, 1}); + + auto pad_value = op::Constant::create(element::f32, Shape{}, std::vector{0.0f}); + + auto pad = + make_shared(X, pad_value, Shape{0, 1, 0, 0}, Shape{0, 0, 1, 0}, Shape{0, 0, 0, 0}); + + auto reshape = make_shared(pad, AxisVector{0, 3, 1, 2}, Shape{1, 1, 3, 3}); + + auto conv = make_shared(reshape, + F, + Strides{1, 1}, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + auto func = make_shared(conv, op::ParameterVector{X, F}); + + ASSERT_EQ(count_ops_of_type(func), 1); + + auto backend = runtime::Backend::create("CPU"); + backend->compile(func); + + ASSERT_EQ(count_ops_of_type(func), 0); +} + +TEST(cpu_fusion, zero_padded_conv) +{ + auto X = make_shared(element::f32, Shape{1, 1, 2, 2}); + auto F = make_shared(element::f32, Shape{1, 1, 1, 1}); + + auto pad_value = op::Constant::create(element::f32, Shape{}, std::vector{0.0f}); + + auto pad = + make_shared(X, pad_value, Shape{0, 0, 0, 1}, Shape{0, 0, 1, 0}, Shape{0, 0, 0, 0}); + + auto conv = make_shared(pad, + F, + Strides{1, 1}, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + auto func = make_shared(conv, op::ParameterVector{X, F}); + + ASSERT_EQ(count_ops_of_type(func), 1); + + auto backend = runtime::Backend::create("CPU"); + backend->compile(func); + + ASSERT_EQ(count_ops_of_type(func), 0); +} + +TEST(cpu_fusion, non_zero_padded_conv) +{ + auto X = make_shared(element::f32, Shape{1, 1, 2, 2}); + auto F = make_shared(element::f32, Shape{1, 1, 1, 1}); + + auto pad_value = op::Constant::create(element::f32, Shape{}, std::vector{1.0f}); + + auto pad = + make_shared(X, pad_value, Shape{0, 0, 0, 1}, Shape{0, 0, 1, 0}, Shape{0, 0, 0, 0}); + + auto conv = make_shared(pad, + F, + Strides{1, 1}, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + auto func = make_shared(conv, op::ParameterVector{X, F}); + + ASSERT_EQ(count_ops_of_type(func), 1); + + auto backend = runtime::Backend::create("CPU"); + backend->compile(func); + + ASSERT_EQ(count_ops_of_type(func), 1); +} + +TEST(cpu_fusion, zero_padded_conv_backprop_filters) +{ + auto X = make_shared(element::f32, Shape{1, 1, 2, 2}); + auto F = make_shared(element::f32, Shape{1, 1, 2, 2}); + + auto pad_value = op::Constant::create(element::f32, Shape{}, std::vector{0.0f}); + + auto pad = + make_shared(X, pad_value, Shape{0, 0, 0, 1}, Shape{0, 0, 1, 0}, Shape{0, 0, 0, 0}); + + auto conv = make_shared(pad, + Shape{1, 1, 2, 2}, + F, + Strides{1, 1}, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + auto func = make_shared(conv, op::ParameterVector{X, F}); + + ASSERT_EQ(count_ops_of_type(func), 1); + + auto backend = runtime::Backend::create("CPU"); + backend->compile(func); + + ASSERT_EQ(count_ops_of_type(func), 0); +} + +TEST(cpu_fusion, fuse_conv_bias) +{ + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass( + runtime::cpu::pass::CPUFusion::DIFFERENTIABLE_FUSIONS); + const string json_path = file_util::path_join(SERIALIZED_ZOO, "conv_bias.json"); + const string json_string = file_util::read_file_to_string(json_path); + stringstream ss(json_string); + shared_ptr func = ngraph::deserialize(ss); + pass_manager.run_passes(func); + size_t cb = count_ops_of_type(func); + ASSERT_GT(cb, 0); +} + +struct ConvolutionBiasTestData +{ + size_t n{0}; + size_t c{0}; + size_t filter{0}; + size_t kernel_size{0}; + size_t w{0}; + size_t h{0}; + shared_ptr data_val; + shared_ptr weights_val; + shared_ptr bias_val; + shared_ptr result_val; + shared_ptr delta_val; + shared_ptr d_data_val; + shared_ptr d_weights_val; + shared_ptr d_bias_val; + vector expected_result_val; + vector expected_d_data_val; + vector expected_d_weights_val; + vector expected_d_bias_val; + + Shape data_shape; + Shape weights_shape; + Shape bias_shape; + Shape result_shape; + shared_ptr data; + shared_ptr weights; + shared_ptr bias; + shared_ptr delta; + + void n1c1h3w3(runtime::Backend* backend) + { + n = 1; + c = 1; + filter = 1; + kernel_size = 3; + w = 3; + h = w; + + data_shape = Shape{n, c, h, w}; + data = make_shared(element::f32, data_shape); + weights_shape = Shape{filter, c, kernel_size, kernel_size}; + weights = make_shared(element::f32, weights_shape); + bias_shape = Shape{filter}; + bias = make_shared(element::f32, bias_shape); + result_shape = Shape{n, filter, 1, 1}; + + data_val = backend->create_tensor(element::f32, data_shape); + copy_data(data_val, + vector{-0.67765152f, + 0.10073948f, + 0.57595438f, + -0.3469252f, + -0.22134334f, + -1.80471897f, + -0.80642909f, + 1.22033095f, + 2.23235631f}); + weights_val = backend->create_tensor(element::f32, weights_shape); + copy_data(weights_val, + vector{0.20070229f, + -0.54968649f, + -0.19819015f, + -0.38577855f, + 1.37109005f, + -0.23789984f, + 0.14867957f, + -0.49851316f, + -0.84815776f}); + bias_val = backend->create_tensor(element::f32, bias_shape); + copy_data(bias_val, vector{0.07811152f}); + + result_val = backend->create_tensor(element::f32, result_shape); + copy_data(result_val, vector{0}); + + delta = make_shared(element::f32, result_shape); + delta_val = backend->create_tensor(element::f32, result_shape); + copy_data(delta_val, vector{-2.58936238f}); + + d_data_val = backend->create_tensor(element::f32, data_shape); + copy_data(d_data_val, vector{0, 0, 0, 0, 0, 0, 0, 0, 0}); + + d_weights_val = backend->create_tensor(element::f32, weights_shape); + copy_data(d_weights_val, vector{0, 0, 0, 0, 0, 0, 0, 0, 0}); + + d_bias_val = backend->create_tensor(element::f32, bias_shape); + copy_data(d_bias_val, vector{0}); + + expected_result_val = vector{-2.58936238f}; + expected_d_data_val = vector{-0.51969099f, + 1.42333758f, + 0.5131861f, + 0.99892044f, + -3.5502491f, + 0.61600888f, + -0.3849853f, + 1.29083121f, + 2.19618773f}; + expected_d_weights_val = vector{1.7546854f, + -0.26085103f, + -1.49135458f, + 0.89831507f, + 0.57313812f, + 4.67307138f, + 2.08813715f, + -3.15987897f, + -5.7803793f}; + expected_d_bias_val = vector{-2.58936238f}; + } +}; + +TEST(cpu_fusion, conv_bias_fprop_n1c1h3w3) +{ + auto backend = runtime::Backend::create("CPU"); + + ConvolutionBiasTestData conv_test; + conv_test.n1c1h3w3(backend.get()); + + auto convolution = make_shared(conv_test.data, conv_test.weights); + auto convolution_bias = make_shared(convolution, conv_test.bias); + + auto f = make_shared( + convolution_bias, op::ParameterVector{conv_test.data, conv_test.weights, conv_test.bias}); + + backend->call_with_validate( + f, {conv_test.result_val}, {conv_test.data_val, conv_test.weights_val, conv_test.bias_val}); + auto result_vec = read_vector(conv_test.result_val); + + EXPECT_TRUE( + test::all_close(conv_test.expected_result_val, read_vector(conv_test.result_val))); +} + +TEST(cpu_fusion, conv_bias_bprop_n1c1h3w3) +{ + auto backend = runtime::Backend::create("CPU"); + + ConvolutionBiasTestData conv_test; + conv_test.n1c1h3w3(backend.get()); + + auto convolution = make_shared(conv_test.data, conv_test.weights); + auto convolution_bias = make_shared(convolution, conv_test.bias); + + auto f = make_shared( + convolution_bias, op::ParameterVector{conv_test.data, conv_test.weights, conv_test.bias}); + + ngraph::autodiff::Adjoints adjoints(NodeVector{convolution_bias}, NodeVector{conv_test.delta}); + + auto d_data = adjoints.backprop_node(conv_test.data); + auto d_weights = adjoints.backprop_node(conv_test.weights); + auto d_bias = adjoints.backprop_node(conv_test.bias); + + auto df = make_shared( + NodeVector{d_data, d_weights, d_bias}, + op::ParameterVector{conv_test.data, conv_test.weights, conv_test.bias, conv_test.delta}); + backend->call_with_validate( + df, + {conv_test.d_data_val, conv_test.d_weights_val, conv_test.d_bias_val}, + {conv_test.data_val, conv_test.weights_val, conv_test.bias_val, conv_test.delta_val}); + + EXPECT_TRUE( + test::all_close(conv_test.expected_d_data_val, read_vector(conv_test.d_data_val))); + EXPECT_TRUE(test::all_close(conv_test.expected_d_weights_val, + read_vector(conv_test.d_weights_val))); + EXPECT_TRUE( + test::all_close(conv_test.expected_d_bias_val, read_vector(conv_test.d_bias_val))); +} + +TEST(cpu_fusion, conv_bias_bprop) +{ + Shape shape{2, 2, 1, 1}; + auto data_batch = std::make_shared(element::f32, shape); + auto filters = std::make_shared(element::f32, shape); + auto delta = std::make_shared(element::f32, shape); + auto bias = make_shared(element::f32, Shape{shape[0]}); + auto pbroadcast = std::make_shared(bias, shape, AxisSet{1, 2, 3}); + auto conv = std::make_shared(data_batch, filters); + auto conv_bias = std::make_shared(conv, pbroadcast); + + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass("conv_bias_bprop_fusion"); + auto f = make_shared(conv_bias, op::ParameterVector{data_batch, filters, bias}); + + ngraph::autodiff::Adjoints adjoints(NodeVector{conv_bias}, NodeVector{delta}); + + auto d_data = adjoints.backprop_node(data_batch); + auto d_weights = adjoints.backprop_node(filters); + auto d_bias = adjoints.backprop_node(bias); + + auto df = make_shared(NodeVector{d_data, d_weights, d_bias}, + op::ParameterVector{data_batch, filters, bias, delta}); + + pass_manager.run_passes(df); + size_t ccg = count_ops_of_type(df); + ASSERT_EQ(ccg, 1); +} + +TEST(cpu_fusion, batchnorm_fprop_relu_b1c2h2w2) +{ + auto input_shape = Shape{1, 2, 2, 2}; + auto input = make_shared(element::f32, input_shape); + auto mean_shape = Shape{2}; + auto var_shape = Shape{2}; + auto gamma_shape = Shape{2}; + auto gamma = make_shared(element::f32, gamma_shape); + auto beta_shape = Shape{2}; + auto beta = make_shared(element::f32, beta_shape); + double eps = 0.001; + auto shape_r = Shape{1, 2, 2, 2}; + auto bn = make_shared(input, gamma, beta, eps); + + auto output_rt = std::make_shared(bn, 0); + // Note, op::Splice is used to break Relu(BatchNorm) fusion + // otherwise we will be comparing two BatchNormRelus + // Unfortunately, we can't use INTERPRETER for + // verifying the results as it doesn't implement + // BatchNorm op. + auto slice = + std::make_shared(output_rt, Coordinate{0, 0, 0, 0}, Coordinate{1, 2, 2, 2}); + auto output_relu = std::make_shared(slice); + auto mean_rt = std::make_shared(bn, 1); + auto variance_rt = std::make_shared(bn, 2); + + auto bn_relu = make_shared(input, gamma, beta, eps); + auto output_rt_bnr = std::make_shared(bn_relu, 0); + auto mean_rt_bnr = std::make_shared(bn_relu, 1); + auto variance_rt_bnr = std::make_shared(bn_relu, 2); + + auto f = make_shared( + NodeVector{output_relu, mean_rt, variance_rt, output_rt_bnr, mean_rt_bnr, variance_rt_bnr}, + op::ParameterVector{input, gamma, beta}); + auto backend = runtime::Backend::create("CPU"); + + // Create some tensors for input/output + auto input_t = backend->create_tensor(element::f32, Shape{1, 2, 2, 2}); + + copy_data(input_t, + vector{0.54881352f, + 0.71518934f, + 0.60276335f, + 0.54488319f, + 0.42365479f, + 0.64589411f, + 0.4375872f, + 0.89177299f}); + auto gamma_t = backend->create_tensor(element::f32, gamma_shape); + copy_data(gamma_t, vector{1.0f, 1.0f}); + auto beta_t = backend->create_tensor(element::f32, beta_shape); + copy_data(beta_t, vector{0.0f, 0.0f}); + auto bn_output = backend->create_tensor(element::f32, shape_r); + auto result_mean = backend->create_tensor(element::f32, mean_shape); + auto result_variance = backend->create_tensor(element::f32, var_shape); + + auto bn_output_bnr = backend->create_tensor(element::f32, shape_r); + auto result_mean_bnr = backend->create_tensor(element::f32, mean_shape); + auto result_variance_bnr = backend->create_tensor(element::f32, var_shape); + + backend->call_with_validate(f, + {bn_output, + result_mean, + result_variance, + bn_output_bnr, + result_mean_bnr, + result_variance_bnr}, + {input_t, gamma_t, beta_t}); + + EXPECT_TRUE(test::all_close(read_vector(bn_output), read_vector(bn_output_bnr))); + EXPECT_TRUE( + test::all_close(read_vector(result_mean), read_vector(result_mean_bnr))); + EXPECT_TRUE(test::all_close(read_vector(result_variance), + read_vector(result_variance_bnr))); +} + +TEST(cpu_fusion, fuse_conv_relu) +{ + auto A = std::make_shared(element::f32, Shape{2, 1, 2, 2}); + auto weights = std::make_shared(element::f32, Shape{1, 1, 2, 2}); + auto convolution = std::make_shared(A, weights, Strides{1, 1}, Strides{1, 1}); + auto relu = std::make_shared(convolution); + auto abs_node = + std::make_shared(std::make_shared(std::make_shared(relu))); + auto func = make_shared(abs_node, op::ParameterVector{A, weights}); + + pass::Manager pass_manager; + pass_manager.register_pass( + runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); + pass_manager.run_passes(func); + size_t cb = count_ops_of_type(func); + ASSERT_GT(cb, 0); +} + +TEST(cpu_fusion, conv_relu_n2c1h2w2_2) +{ + Shape shape_a{2, 1, 6, 6}; + Shape shape_weights{1, 1, 2, 2}; + + auto make_int_function = [shape_a, shape_weights]() { + auto A = std::make_shared(element::f32, shape_a); + auto weights = std::make_shared(element::f32, shape_weights); + auto conv = std::make_shared(A, weights, Strides{2, 2}, Strides{1, 1}); + auto relu = std::make_shared(conv); + auto f = make_shared(NodeVector{relu}, op::ParameterVector{A, weights}); + return f; + }; + + auto int_f = make_int_function(); + + auto make_cpu_function = [shape_a, shape_weights]() { + auto A = std::make_shared(element::f32, shape_a); + auto weights = std::make_shared(element::f32, shape_weights); + auto conv = std::make_shared(A, weights, Strides{2, 2}, Strides{1, 1}); + auto conv_relu = std::make_shared(conv); + auto f = make_shared(NodeVector{conv_relu}, op::ParameterVector{A, weights}); + return f; + }; + + auto cpu_f = make_cpu_function(); + + vector> args{ + {1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f, 7.25f, 8.25f, -1.25f, + -1.25f, 1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f, -4.25f, 2.25f, + 4.25f, 4.25f, 0.f, 0.f, -1.f, 0.f, 2.f, 2.f, 0.f, 0.f, 0.f, + 0.f, 2.f, 2.f, 1.25f, 2.25f, 5.25f, 6.25f, 1.25f, 1.25f, 3.25f, 4.25f, + -7.25f, 8.25f, 1.25f, -1.25f, -1.25f, 2.25f, 3.25f, 2.25f, -4.25f, -4.25f, -1.25f, + -2.25f, 4.25f, 2.25f, 4.25f, 4.25f, 0.f, 0.f, 1.f, 0.f, -2.f, 2.f, + 0.f, 0.f, 0.f, 0.f, -2.f, -2.f}, + {2., 2., 2., 2.}}; + + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); +} + +TEST(cpu_fusion, conv_bias_relu_n2c1h2w2_2) +{ + Shape shape_a{2, 1, 6, 6}; + Shape shape_weights{1, 1, 2, 2}; + Shape shape_bias{1}; + + auto make_int_function = [shape_a, shape_weights, shape_bias]() { + auto A = std::make_shared(element::f32, shape_a); + auto weights = std::make_shared(element::f32, shape_weights); + auto conv = std::make_shared(A, weights, Strides{2, 2}, Strides{1, 1}); + auto bias = std::make_shared(element::f32, shape_bias); + auto conv_bias = + conv + std::make_shared(bias, conv->get_shape(), AxisSet{0, 2, 3}); + auto relu = std::make_shared(conv_bias); + auto f = make_shared(NodeVector{relu}, op::ParameterVector{A, weights, bias}); + return f; + }; + + auto int_f = make_int_function(); + + auto make_cpu_function = [shape_a, shape_weights, shape_bias]() { + auto A = std::make_shared(element::f32, shape_a); + auto weights = std::make_shared(element::f32, shape_weights); + auto bias = std::make_shared(element::f32, shape_bias); + auto conv = std::make_shared(A, weights, Strides{2, 2}, Strides{1, 1}); + auto conv_bias_relu = std::make_shared(conv, bias, true); + auto f = make_shared(NodeVector{conv_bias_relu}, + op::ParameterVector{A, weights, bias}); + return f; + }; + + auto cpu_f = make_cpu_function(); + + vector> args{ + {1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f, 7.25f, 8.25f, -1.25f, + -1.25f, 1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f, -4.25f, 2.25f, + 4.25f, 4.25f, 0.f, 0.f, -1.f, 0.f, 2.f, 2.f, 0.f, 0.f, 0.f, + 0.f, 2.f, 2.f, 1.25f, 2.25f, 5.25f, 6.25f, 1.25f, 1.25f, 3.25f, 4.25f, + -7.25f, 8.25f, 1.25f, -1.25f, -1.25f, 2.25f, 3.25f, 2.25f, -4.25f, -4.25f, -1.25f, + -2.25f, 4.25f, 2.25f, 4.25f, 4.25f, 0.f, 0.f, 1.f, 0.f, -2.f, 2.f, + 0.f, 0.f, 0.f, 0.f, -2.f, -2.f}, + {2., 2., 2., 2.}, + {0.1f}}; + + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); +} + +TEST(cpu_fusion, conv_horizontal_fusion) +{ + Shape shape_a{2, 1, 6, 6}; + Shape shape_weights{1, 1, 2, 2}; + Shape shape_bias{1}; + + auto make_function = [shape_a, shape_weights, shape_bias]() { + auto A = std::make_shared(element::f32, shape_a); + auto weights1 = std::make_shared(element::f32, shape_weights); + auto conv1 = std::make_shared(A, weights1, Strides{2, 2}, Strides{1, 1}); + auto bias1 = std::make_shared(element::f32, shape_bias); + auto conv_bias1 = + conv1 + std::make_shared(bias1, conv1->get_shape(), AxisSet{0, 2, 3}); + auto relu1 = std::make_shared(conv_bias1); + + auto weights2 = std::make_shared(element::f32, shape_weights); + auto conv2 = std::make_shared(A, weights2, Strides{2, 2}, Strides{1, 1}); + auto bias2 = std::make_shared(element::f32, shape_bias); + auto conv_bias2 = + conv2 + std::make_shared(bias2, conv2->get_shape(), AxisSet{0, 2, 3}); + auto relu2 = std::make_shared(conv_bias2); + + auto concat = std::make_shared(NodeVector{relu1, relu2}, 1); + auto f = make_shared(NodeVector{concat}, + op::ParameterVector{A, weights1, bias1, weights2, bias2}); + return f; + }; + auto int_f = make_function(); + auto cpu_f = make_function(); + + vector> args{ + {1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f, 7.25f, 8.25f, -1.25f, + -1.25f, 1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f, -4.25f, 2.25f, + 4.25f, 4.25f, 0.f, 0.f, -1.f, 0.f, 2.f, 2.f, 0.f, 0.f, 0.f, + 0.f, 2.f, 2.f, 1.25f, 2.25f, 5.25f, 6.25f, 1.25f, 1.25f, 3.25f, 4.25f, + -7.25f, 8.25f, 1.25f, -1.25f, -1.25f, 2.25f, 3.25f, 2.25f, -4.25f, -4.25f, -1.25f, + -2.25f, 4.25f, 2.25f, 4.25f, 4.25f, 0.f, 0.f, 1.f, 0.f, -2.f, 2.f, + 0.f, 0.f, 0.f, 0.f, -2.f, -2.f}, + {2., 2., 2., 2.}, + {0.1f}, + {3., 3., 3., 3.}, + {0.2f}}; + + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); + + size_t cpu_cb = count_ops_of_type(cpu_f); + ASSERT_EQ(cpu_cb, 1); +} + +// ConvolutionBiasAdd relies on an in-place fused MKLDNN kernel. +// Need to ensure that it is fused only when in-place buffer allocation is feasible +shared_ptr gen_conv_bias_add(bool param_input, bool result_output) +{ + auto A = make_shared(element::f32, Shape{2, 1, 2, 2}); + auto weights = make_shared(element::f32, Shape{1, 1, 1, 1}); + auto bias = make_shared(element::f32, Shape{1}); + auto conv = make_shared(A, weights, Strides{1, 1}, Strides{1, 1}); + auto bias_broadcast = make_shared(bias, conv->get_shape(), AxisSet{0, 2, 3}); + auto convbias = conv + bias_broadcast; + auto B = make_shared(element::f32, Shape{2, 1, 2, 2}); + auto abs_B = make_shared(B); + auto add = + param_input ? make_shared(convbias, B) : make_shared(convbias, abs_B); + auto abs = make_shared(add); + + return result_output ? make_shared(add, op::ParameterVector{A, weights, bias, B}) + : make_shared(abs, op::ParameterVector{A, weights, bias, B}); +} + +TEST(cpu_fusion, fuse_conv_bias_add) +{ + auto func_fuse = gen_conv_bias_add(false, false); + auto func_nofuse1 = gen_conv_bias_add(true, false); + auto func_nofuse2 = gen_conv_bias_add(false, true); + + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.run_passes(func_fuse); + ASSERT_EQ(count_ops_of_type(func_fuse), 1); + + pass_manager.run_passes(func_nofuse1); + ASSERT_EQ(count_ops_of_type(func_nofuse1), 0); + + pass_manager.run_passes(func_nofuse2); + ASSERT_EQ(count_ops_of_type(func_nofuse2), 1); +} + +TEST(cpu_fusion, conv_bias_add) +{ + auto int_f = gen_conv_bias_add(false, false); + auto cpu_f = gen_conv_bias_add(false, false); + + vector> args{{1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f}, + {-1.25f}, + {2.25f}, + {1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f}}; + + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); +} + +// ConvolutionAdd relies on an in-place fused MKLDNN kernel. +// Need to ensure that it is fused only when in-place buffer allocation is feasible +shared_ptr gen_conv_add(bool param_input, bool result_output) +{ + auto A = make_shared(element::f32, Shape{2, 1, 2, 2}); + auto weights = make_shared(element::f32, Shape{1, 1, 1, 1}); + auto conv = make_shared(A, weights, Strides{1, 1}, Strides{1, 1}); + auto B = make_shared(element::f32, Shape{2, 1, 2, 2}); + auto abs_B = make_shared(B); + auto add = param_input ? make_shared(conv, B) : make_shared(conv, abs_B); + auto abs = make_shared(add); + + return result_output ? make_shared(add, op::ParameterVector{A, weights, B}) + : make_shared(abs, op::ParameterVector{A, weights, B}); +} + +TEST(cpu_fusion, fuse_conv_add) +{ + auto func_fuse = gen_conv_add(false, false); + auto func_nofuse1 = gen_conv_add(true, false); + auto func_nofuse2 = gen_conv_add(false, true); + + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.run_passes(func_fuse); + ASSERT_EQ(count_ops_of_type(func_fuse), 1); + + pass_manager.run_passes(func_nofuse1); + ASSERT_EQ(count_ops_of_type(func_nofuse1), 0); + + pass_manager.run_passes(func_nofuse2); + ASSERT_EQ(count_ops_of_type(func_nofuse2), 1); +} + +TEST(cpu_fusion, conv_add) +{ + auto int_f = gen_conv_add(false, false); + auto cpu_f = gen_conv_add(false, false); + + vector> args{{1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f}, + {-1.25f}, + {1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f}}; + + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); + + int_f = gen_conv_add(false, true); + cpu_f = gen_conv_add(false, true); + + int_results = execute(int_f, args, "INTERPRETER"); + cpu_results = execute(cpu_f, args, "CPU"); + EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); +} + +shared_ptr gen_groupconv_batchnorm(const bool add_goe, + const bool with_relu, + const Shape shape_in, + const Shape shape_weights, + const Shape shape_out, + const size_t groups) +{ + auto input = make_shared(element::f32, shape_in); + auto weights = make_shared(element::f32, shape_weights); + + unsigned long OC = shape_out.at(1); + Shape shape_bn{OC}; + auto group_conv = make_shared(input, + weights, + Strides{1, 1}, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}, + groups, + shape_out); + + double eps = 0.001; + auto gamma = std::make_shared(element::f32, shape_bn); + auto beta = std::make_shared(element::f32, shape_bn); + auto mean = std::make_shared(element::f32, shape_bn); + auto var = std::make_shared(element::f32, shape_bn); + + auto goe_bn = std::make_shared(group_conv, 0); + + // Adding a goe will stop fusion since the patterns wont expect to see this op + auto bn = + add_goe ? std::make_shared(goe_bn, gamma, beta, mean, var, eps) + : std::make_shared(group_conv, gamma, beta, mean, var, eps); + if (with_relu) + { + auto prelu = std::make_shared(bn); + auto f = make_shared(NodeVector{prelu}, + op::ParameterVector{input, weights, gamma, beta, mean, var}); + return f; + } + else + { + auto f = make_shared(NodeVector{bn}, + op::ParameterVector{input, weights, gamma, beta, mean, var}); + return f; + } +} + +void fuse_groupconv_batchnorm_helper(Shape shape_in, + Shape shape_weights, + Shape shape_r, + size_t groups) +{ + auto func_fuse = + gen_groupconv_batchnorm(false, false, shape_in, shape_weights, shape_r, groups); + auto func_fuse2 = + gen_groupconv_batchnorm(false, true, shape_in, shape_weights, shape_r, groups); + + { + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.run_passes(func_fuse); + ASSERT_EQ(count_ops_of_type(func_fuse), 1); + } + + { + // test groupconv + batchnorm + relu fusion + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.run_passes(func_fuse2); + ASSERT_EQ(count_ops_of_type(func_fuse2), 1); + ASSERT_EQ(count_ops_of_type(func_fuse2), 0); + } +} + +void groupconv_batchnorm_test_val_helper( + const bool with_relu, Shape shape_in, Shape shape_weights, Shape shape_r, size_t groups) +{ + shared_ptr fuse_func = + gen_groupconv_batchnorm(false, with_relu, shape_in, shape_weights, shape_r, groups); + shared_ptr nofuse_func = + gen_groupconv_batchnorm(true, with_relu, shape_in, shape_weights, shape_r, groups); + + test::Uniform rng(1.0f, 100.0f); + vector> args; + for (shared_ptr param : fuse_func->get_parameters()) + { + vector tensor_val(shape_size(param->get_shape())); + rng.initialize(tensor_val); + args.push_back(tensor_val); + } + + auto fuse_results = execute(fuse_func, args, "CPU"); + auto nofuse_results = execute(nofuse_func, args, "CPU"); + + EXPECT_TRUE(test::all_close(fuse_results.at(0), nofuse_results.at(0))); +} + +TEST(cpu_fusion, fuse_groupconv_batchnorm1) +{ + Shape shape_in{1, 20, 5, 5}; + Shape shape_weights{8, 10, 3, 3}; + Shape shape_r{1, 8, 3, 3}; + fuse_groupconv_batchnorm_helper(shape_in, shape_weights, shape_r, 2); + groupconv_batchnorm_test_val_helper(false, shape_in, shape_weights, shape_r, 2); + groupconv_batchnorm_test_val_helper(true, shape_in, shape_weights, shape_r, 2); +} + +TEST(cpu_fusion, fuse_groupconv_batchnorm2) +{ + Shape shape_in{1, 20, 5, 5}; + Shape shape_weights{5, 4, 3, 3}; + Shape shape_r{1, 5, 3, 3}; + fuse_groupconv_batchnorm_helper(shape_in, shape_weights, shape_r, 5); + groupconv_batchnorm_test_val_helper(false, shape_in, shape_weights, shape_r, 5); + groupconv_batchnorm_test_val_helper(true, shape_in, shape_weights, shape_r, 5); +} + +TEST(cpu_fusion, fuse_groupconv_batchnorm3) +{ + Shape shape_in{1, 20, 5, 5}; + Shape shape_weights{20, 1, 3, 3}; + Shape shape_r{1, 20, 3, 3}; + fuse_groupconv_batchnorm_helper(shape_in, shape_weights, shape_r, 20); + groupconv_batchnorm_test_val_helper(false, shape_in, shape_weights, shape_r, 20); + groupconv_batchnorm_test_val_helper(true, shape_in, shape_weights, shape_r, 20); +} + +TEST(cpu_fusion, fuse_groupconv_batchnorm4) +{ + Shape shape_in{1, 20, 4, 4}; + Shape shape_weights{5, 20, 1, 1}; + Shape shape_r{1, 5, 4, 4}; + fuse_groupconv_batchnorm_helper(shape_in, shape_weights, shape_r, 1); + groupconv_batchnorm_test_val_helper(false, shape_in, shape_weights, shape_r, 1); + groupconv_batchnorm_test_val_helper(true, shape_in, shape_weights, shape_r, 1); +} + +std::vector> rnn_matrix_fusion_eval(const size_t time_steps, + const Shape& data_shape, + const Shape& weights_shape, + const Shape& bias_shape, + const vector& data_val, + const vector& weights_val, + const vector& bias_val, + const bool enable_pass) +{ + auto data = make_shared(element::f32, data_shape); + auto weights = make_shared(element::f32, weights_shape); + auto bias = make_shared(element::f32, bias_shape); + + // results from each time step + NodeVector results; + for (size_t t = 0; t < time_steps; ++t) + { + auto data_slice = make_shared( + data, Coordinate{0, t, 0}, Coordinate{data_shape[0], t + 1, data_shape[2]}); + auto data_reshape = make_shared( + data_slice, AxisVector{0, 1, 2}, Shape{data_shape[0], data_shape[2]}); + auto weights_reshape = make_shared( + weights, AxisVector{1, 0}, Shape{weights_shape[1], weights_shape[0]}); + auto dot = make_shared(data_reshape, weights_reshape); + auto bias_broadcast = make_shared(bias, dot->get_shape(), AxisSet{0}); + auto add = make_shared(dot, bias_broadcast); + results.push_back(add); + } + auto func = make_shared(results, op::ParameterVector{data, weights, bias}); + if (enable_pass) + { + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass( + runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); + pass_manager.run_passes(func); + // check all of our dot/add are converted to a single MatmulBias op. + size_t count = count_ops_of_type(func); + EXPECT_EQ(count, 1); + } + + auto backend = runtime::Backend::create("CPU"); + + shared_ptr data_tensor = + backend->create_tensor(element::f32, data->get_shape()); + shared_ptr weights_tensor = + backend->create_tensor(element::f32, weights->get_shape()); + shared_ptr bias_tensor = + backend->create_tensor(element::f32, bias->get_shape()); + + std::vector> result_tensors; + for (auto r : results) + { + result_tensors.push_back(backend->create_tensor(element::f32, r->get_shape())); + } + + copy_data(data_tensor, data_val); + copy_data(weights_tensor, weights_val); + copy_data(bias_tensor, bias_val); + backend->call_with_validate(func, result_tensors, {data_tensor, weights_tensor, bias_tensor}); + return result_tensors; +} + +TEST(cpu_fusion, rnn_matrix_fusion_eval_pass) +{ + const size_t time_steps = 4; + Shape data_shape{3, time_steps, 5}; + Shape weights_shape{6, data_shape[2]}; + Shape bias_shape{6}; + + test::Uniform rng{0, 1, 0}; + vector data_val(shape_size(data_shape)); + vector weights_val(shape_size(weights_shape)); + vector bias_val(shape_size(bias_shape)); + rng.initialize(data_val); + rng.initialize(weights_val); + rng.initialize(bias_val); + + std::vector> result_expected = rnn_matrix_fusion_eval( + time_steps, data_shape, weights_shape, bias_shape, data_val, weights_val, bias_val, false); + std::vector> result_fused = rnn_matrix_fusion_eval( + time_steps, data_shape, weights_shape, bias_shape, data_val, weights_val, bias_val, true); + for (size_t i = 0; i < result_expected.size(); ++i) + { + EXPECT_TRUE(test::all_close(result_expected[i], result_fused[i])); + } +} + +TEST(cpu_fusion, rnn_fusion_from_json_model) +{ + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass( + runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); + const string json_path = + file_util::path_join(SERIALIZED_ZOO, "mxnet/rnn-10-step-fusion-test.json"); + const string json_string = file_util::read_file_to_string(json_path); + stringstream ss(json_string); + shared_ptr func = ngraph::deserialize(ss); + pass_manager.run_passes(func); + const size_t NUM_STEPS = 10; + auto mmb_predicate = [](std::shared_ptr node) { + auto users = node->get_users(); + return users.size() == NUM_STEPS && + std::all_of(begin(users), end(users), [](std::shared_ptr n) { + return std::dynamic_pointer_cast(n) != nullptr; + }); + }; + + auto mmbs = get_ops_of_type(func); + ASSERT_TRUE(std::any_of(begin(mmbs), end(mmbs), mmb_predicate)); +} + +TEST(cpu_fusion, weight_fusion) +{ + auto param = std::make_shared(element::f32, Shape{64}); + auto reshape_conv = + std::make_shared(param, AxisVector{0}, Shape{16, 4, 1, 1}); + auto data_conv = std::make_shared(element::f32, Shape{16, 4, 7, 7}); + auto tvt = reshape_conv->get_outputs().at(0).get_tensor_ptr().get(); + auto lt_desc = std::make_shared(*tvt); + auto cvt_lt_conv = std::make_shared(reshape_conv, lt_desc); + auto conv = std::make_shared( + data_conv, cvt_lt_conv, Strides{1, 1}, Strides{1, 1}); + + auto reshape_conv_bprop = + std::make_shared(param, AxisVector{0}, Shape{16, 4, 1, 1}); + auto dummy_arg_conv_bprop = std::make_shared(element::f32, Shape{1, 16, 7, 7}); + auto tvt_bprop = reshape_conv_bprop->get_outputs().at(0).get_tensor_ptr().get(); + auto lt_desc_bprop = std::make_shared(*tvt_bprop); + auto cvt_lt_conv_bprop = + std::make_shared(reshape_conv_bprop, lt_desc_bprop); + auto conv_bprop = std::make_shared(Shape{1, 4, 7, 7}, + cvt_lt_conv_bprop, + dummy_arg_conv_bprop, + Strides{1, 1}, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + auto conv_relu = std::make_shared(conv); + auto conv_bprop_abs = std::make_shared(conv_bprop); + + auto f = make_shared(NodeVector{conv_relu, conv_bprop_abs}, + op::ParameterVector{param, data_conv, dummy_arg_conv_bprop}); + + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.run_passes(f); + + auto new_conv_bprop_data = conv_bprop_abs->get_argument(0); + auto new_convert_layout = new_conv_bprop_data->get_argument(0); + + ASSERT_EQ(std::dynamic_pointer_cast( + new_convert_layout->get_argument(0)), + cvt_lt_conv); +} + +TEST(cpu_fusion, max_pool_with_indices) +{ + Shape shape_a{10, 3, 28, 28}; + auto input = std::make_shared(element::f32, shape_a); + Shape window_shape{2, 2}; + auto max_pool = std::make_shared(input, window_shape); + auto C = std::make_shared(element::f32, max_pool->get_shape()); + + ngraph::autodiff::Adjoints adjoints(NodeVector{max_pool}, NodeVector{C}); + + auto dinput = adjoints.backprop_node(input); + + auto df = std::make_shared(NodeVector{dinput}, op::ParameterVector{input, C}); + + auto f = std::make_shared(NodeVector{max_pool}, op::ParameterVector{input}); + + { + pass::Manager pass_manager; + pass_manager.register_pass("max_pool_fprop_before.pdf"); + pass_manager.run_passes(f); + } + + { + NodeVector nv_cwi; + pass::Manager pass_manager; + pass_manager.register_pass("max_pool_bprop_before.pdf"); + pass_manager.register_pass(nv_cwi); + pass_manager.register_pass("max_pool_bprop_after.pdf"); + pass_manager.run_passes(df); + } + + { + pass::Manager pass_manager; + pass_manager.register_pass("max_pool_fprop_after.pdf"); + pass_manager.run_passes(f); + } + + auto maxpool_goe_output = + std::dynamic_pointer_cast(f->get_results().at(0)->get_argument(0)); + ASSERT_TRUE(maxpool_goe_output); + ASSERT_EQ(maxpool_goe_output->get_n(), 0); + auto maxpool_with_indices = df->get_results().at(0)->get_argument(0); + auto maxpool_goe_indices = + std::dynamic_pointer_cast(maxpool_with_indices->get_argument(2)); + ASSERT_TRUE(maxpool_goe_indices); + ASSERT_EQ(maxpool_goe_indices->get_n(), 1); +} + +TEST(cpu_fusion, backwards_maxpool_with_indices_n4_c1_hw4_2x2_max) +{ + Shape shape_a{1, 4, 4, 4}; + Shape maxpool_shape{1, 4, 3, 3}; + auto A = std::make_shared(element::f32, shape_a); + Shape window_shape{2, 2}; + auto window_movement_strides = Strides{1, 1}; + auto maxpool = std::make_shared(A, window_shape, window_movement_strides); + auto f = std::make_shared(maxpool, op::ParameterVector{A}); + + auto backend = runtime::Backend::create("CPU"); + shared_ptr ep = backend->create_tensor(element::f32, maxpool_shape); + vector dataEp(shape_size(maxpool_shape), 4); + + shared_ptr input = backend->create_tensor(element::f32, shape_a); + shared_ptr output = backend->create_tensor(element::f32, shape_a); + + vector dataInput{11.f, 31.f, 40.f, 47.f, 13.f, 61.f, 48.f, 59.f, 17.f, 39.f, 64.f, + 62.f, 45.f, 55.f, 36.f, 19.f, 65.f, 33.f, 49.f, 30.f, 56.f, 41.f, + 53.f, 58.f, 22.f, 35.f, 52.f, 50.f, 63.f, 54.f, 12.f, 26.f, 44.f, + 21.f, 69.f, 24.f, 46.f, 25.f, 51.f, 29.f, 72.f, 15.f, 73.f, 10.f, + 16.f, 37.f, 70.f, 32.f, 28.f, 66.f, 57.f, 27.f, 60.f, 42.f, 43.f, + 71.f, 18.f, 38.f, 67.f, 68.f, 14.f, 20.f, 34.f, 23.f}; + + vector expected{0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 12.0f, 0.0f, 4.0f, 0.0f, 0.0f, 16.0f, + 0.0f, 0.0f, 4.0f, 0.0f, 0.0f, 4.0f, 0.0f, 0.0f, 0.0f, 4.0f, 0.0f, + 8.0f, 8.0f, 0.0f, 0.0f, 4.0f, 0.0f, 4.0f, 4.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 8.0f, 0.0f, 4.0f, 0.0f, 0.0f, 0.0f, 8.0f, 0.0f, 16.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 8.0f, 0.0f, 0.0f, 4.0f, 0.0f, 0.0f, + 8.0f, 0.0f, 4.0f, 8.0f, 4.0f, 0.0f, 0.0f, 0.0f, 0.0f}; + + copy_data(ep, dataEp); + copy_data(input, dataInput); + + auto C = std::make_shared(element::f32, maxpool_shape); + auto df = autodiff::backprop_function(f); + + { + NodeVector nv_cwi; + pass::Manager pass_manager; + pass_manager.register_pass("max_pool_bprop_before2.pdf"); + pass_manager.register_pass(nv_cwi); + pass_manager.register_pass("max_pool_bprop_after2.pdf"); + pass_manager.run_passes(df); + } + + backend->call_with_validate(df, {output}, {input, ep}); + ASSERT_TRUE(read_vector(output) == expected); +} + +#if 0 +TEST(cpu_fusion, loop_kernel_one_input_one_output) +{ + Shape shapeA{2, 2}; + auto A = make_shared(element::i32, shapeA); + auto neg_a = make_shared(A); + auto lk = make_shared( + NodeVector{neg_a}, NodeVector{neg_a}, NodeVector{A}); + auto f = make_shared(NodeVector{lk}, op::ParameterVector{A}); + + auto backend = runtime::Backend::create("CPU"); + shared_ptr a = backend->create_tensor(element::i32, shapeA); + shared_ptr result = backend->create_tensor(element::i32, shapeA); + + vector dataA{1, 4, 1, 4}; + copy_data(a, dataA); + vector expected{-1, -4, -1, -4}; + + backend->call_with_validate(f, {result}, {a}); + + EXPECT_EQ(read_vector(result), expected); +} + +TEST(cpu_fusion, loop_kernel_embedded_graph) +{ + Shape shapeA{2, 2}; + auto A = make_shared(element::i32, shapeA); + auto B = make_shared(element::i32, shapeA); + auto neg_a = make_shared(A); + auto neg_b = make_shared(B); + auto add = neg_a + neg_b; + auto lk = make_shared( + NodeVector{add}, NodeVector{add}, NodeVector{neg_a, neg_b}); + auto f = make_shared(NodeVector{lk}, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("CPU"); + shared_ptr a = backend->create_tensor(element::i32, shapeA); + shared_ptr b = backend->create_tensor(element::i32, shapeA); + shared_ptr result = backend->create_tensor(element::i32, shapeA); + + vector dataA{1, 4, 1, 4}; + copy_data(a, dataA); + vector dataB{1, 2, 3, 4}; + copy_data(b, dataB); + vector expected{-2, -6, -4, -8}; + backend->call_with_validate(f, {result}, {a, b}); + EXPECT_EQ(read_vector(result), expected); +} + +TEST(cpu_fusion, loop_kernel_two_inputs_one_output) +{ + Shape shapeA{2, 2}; + auto A = make_shared(element::i32, shapeA); + auto B = make_shared(element::i32, shapeA); + auto add = A + B; + auto lk = make_shared( + NodeVector{add}, NodeVector{add}, NodeVector{A, B}); + auto f = make_shared(NodeVector{lk}, op::ParameterVector{A, B}); + + auto backend = runtime::Backend::create("CPU"); + shared_ptr a = backend->create_tensor(element::i32, shapeA); + shared_ptr b = backend->create_tensor(element::i32, shapeA); + shared_ptr result = backend->create_tensor(element::i32, shapeA); + + vector dataA{1, 4, 1, 4}; + copy_data(a, dataA); + vector dataB{1, 2, 3, 4}; + copy_data(b, dataB); + vector expected{2, 6, 4, 8}; + + backend->call_with_validate(f, {result}, {a, b}); + + EXPECT_EQ(read_vector(result), expected); +} + +TEST(cpu_fusion, loop_kernel_multiple_outputs) +{ + Shape shapeA{2, 2}; + auto A = make_shared(element::i32, shapeA); + auto B = make_shared(element::i32, shapeA); + auto C = make_shared(element::i32, shapeA); + auto D = make_shared(element::i32, shapeA); + + auto neg_a = make_shared(A); + auto neg_b = make_shared(B); + auto add_ab = neg_a + neg_b; + auto add_cd = C + B; + auto add_cd_abs = make_shared(add_cd); + auto add_ab_abs = make_shared(add_ab); + auto add_aab = add_ab_abs + A; + auto add_cdd = add_cd_abs + D; + + auto lk = make_shared( + NodeVector{neg_a, neg_b, add_ab, add_cd, add_cd_abs, add_ab_abs, add_aab, add_cdd}, + NodeVector{add_aab, add_cdd, neg_b}, + NodeVector{A, B, C, D}); + auto add_aab_goe = std::make_shared(lk, 0); + auto add_cdd_goe = std::make_shared(lk, 1); + auto neg_b_goe = std::make_shared(lk, 2); + + auto f = make_shared(NodeVector{add_aab_goe, add_cdd_goe, neg_b_goe}, + op::ParameterVector{A, B, C, D}); + + auto backend = runtime::Backend::create("CPU"); + + shared_ptr a = backend->create_tensor(element::i32, shapeA); + shared_ptr b = backend->create_tensor(element::i32, shapeA); + shared_ptr c = backend->create_tensor(element::i32, shapeA); + shared_ptr d = backend->create_tensor(element::i32, shapeA); + shared_ptr r1 = backend->create_tensor(element::i32, shapeA); + shared_ptr r2 = backend->create_tensor(element::i32, shapeA); + shared_ptr r3 = backend->create_tensor(element::i32, shapeA); + + vector dataA{1, 4, 1, 4}; + vector dataB{3, 3, 3, 9}; + vector dataC{1, 2, 3, 4}; + vector dataD{-2, 2, -1, 1}; + copy_data(a, dataA); + copy_data(b, dataB); + copy_data(c, dataC); + copy_data(d, dataD); + + backend->call_with_validate(f, {r1, r2, r3}, {a, b, c, d}); + + vector expected1{5, 11, 5, 17}; + vector expected2{2, 7, 5, 14}; + vector expected3{-3, -3, -3, -9}; + EXPECT_EQ(read_vector(r1), expected1); + EXPECT_EQ(read_vector(r2), expected2); + EXPECT_EQ(read_vector(r3), expected3); +} + +TEST(cpu_fusion, loop_kernel_copy_with_new_args) +{ + Shape shapeA{2, 2}; + auto A = make_shared(element::i32, shapeA); + auto B = make_shared(element::i32, shapeA); + auto C = make_shared(element::i32, shapeA); + auto D = make_shared(element::i32, shapeA); + + auto neg_a = make_shared(A); + auto neg_b = make_shared(B); + auto add_ab = neg_a + neg_b; + auto add_cd = C + B; + auto add_cd_abs = make_shared(add_cd); + auto add_ab_abs = make_shared(add_ab); + auto add_aab = add_ab_abs + A; + auto add_cdd = add_cd_abs + D; + + auto lk = make_shared( + NodeVector{neg_a, neg_b, add_ab, add_cd, add_cd_abs, add_ab_abs, add_aab, add_cdd}, + NodeVector{add_aab, add_cdd, neg_b}, + NodeVector{A, B, C, D}); + auto add_aab_goe = std::make_shared(lk, 0); + auto add_cdd_goe = std::make_shared(lk, 1); + auto neg_b_goe = std::make_shared(lk, 2); + + auto f = make_shared(NodeVector{add_aab_goe, add_cdd_goe, neg_b_goe}, + op::ParameterVector{A, B, C, D}); + + auto copy_f = clone_function(*f); + + auto backend = runtime::Backend::create("CPU"); + + shared_ptr a = backend->create_tensor(element::i32, shapeA); + shared_ptr b = backend->create_tensor(element::i32, shapeA); + shared_ptr c = backend->create_tensor(element::i32, shapeA); + shared_ptr d = backend->create_tensor(element::i32, shapeA); + shared_ptr r1 = backend->create_tensor(element::i32, shapeA); + shared_ptr r2 = backend->create_tensor(element::i32, shapeA); + shared_ptr r3 = backend->create_tensor(element::i32, shapeA); + shared_ptr copy_r1 = backend->create_tensor(element::i32, shapeA); + shared_ptr copy_r2 = backend->create_tensor(element::i32, shapeA); + shared_ptr copy_r3 = backend->create_tensor(element::i32, shapeA); + + vector dataA{1, 4, 1, 4}; + vector dataB{3, 3, 3, 9}; + vector dataC{1, 2, 3, 4}; + vector dataD{-2, 2, -1, 1}; + copy_data(a, dataA); + copy_data(b, dataB); + copy_data(c, dataC); + copy_data(d, dataD); + + backend->call_with_validate(f, {r1, r2, r3}, {a, b, c, d}); + backend->call_with_validate(copy_f, {copy_r1, copy_r2, copy_r3}, {a, b, c, d}); + + EXPECT_EQ(read_vector(r1), read_vector(copy_r1)); + EXPECT_EQ(read_vector(r2), read_vector(copy_r2)); + EXPECT_EQ(read_vector(r3), read_vector(copy_r3)); +} + +#endif + +static std::shared_ptr make_forward_function() +{ + Shape shape_a{10, 3, 28, 28}; + auto input = std::make_shared(element::f32, shape_a); + Shape window_shape{2, 2}; + auto max_pool = std::make_shared(input, window_shape); + auto neg = std::make_shared(max_pool); + auto absn = std::make_shared(max_pool); + return std::make_shared(NodeVector{max_pool, neg, absn}, op::ParameterVector{input}); +} + +static std::pair, std::vector>> + make_backward_function(std::shared_ptr f) +{ + // get parameters + std::vector> back_parameters = f->get_parameters(); + + ngraph::NodeVector adjoints; + ngraph::NodeVector outputs; + for (auto Y : f->get_results()) + { + // Get the output + // Create the Adjoint + auto C = std::make_shared(Y->get_element_type(), Y->get_shape()); + outputs.push_back(Y); + adjoints.push_back(C); + } + + ngraph::autodiff::Adjoints adjoint{outputs, adjoints}; + + // Perform autodiff + std::vector> dYdXs(back_parameters.size()); + transform(back_parameters.begin(), + back_parameters.end(), + dYdXs.begin(), + [&adjoint](const std::shared_ptr& X) { return adjoint.backprop_node(X); }); + + // create the backward function + std::vector> param_adjoints; + for (auto n : adjoints) + param_adjoints.push_back(std::dynamic_pointer_cast(n)); + back_parameters.insert(back_parameters.begin(), param_adjoints.begin(), param_adjoints.end()); + + return {std::make_shared(dYdXs, back_parameters), adjoints}; +} + +void optimize_graph(std::shared_ptr& f, std::shared_ptr bf) +{ + // start by removing excess reshapes + NodeVector nv_cwi; + ngraph::pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass(); + pass_manager.register_pass(nv_cwi); + pass_manager.register_pass("before.fprop_cache.pdf"); + + pass_manager.run_passes(f); + pass_manager.run_passes(bf); + if (nv_cwi.size() > 0) + { + NodeVector new_outputs; + for (auto r : f->get_results()) + { + new_outputs.push_back(r->get_argument(0)); + } + + new_outputs.insert(new_outputs.end(), nv_cwi.begin(), nv_cwi.end()); + f = std::make_shared(new_outputs, f->get_parameters()); + } + + ngraph::NodeVector dYdXs; + for (size_t i = 0; i < bf->get_output_size(); ++i) + { + dYdXs.push_back(bf->get_output_op(i)->get_argument(0)); + } + + ngraph::NodeVector combined_outputs; + for (auto r : f->get_results()) + { + combined_outputs.push_back(r->get_argument(0)); + } + + combined_outputs.insert(combined_outputs.end(), dYdXs.begin(), dYdXs.end()); + + std::vector> combined_parameters = f->get_parameters(); + std::vector> back_parameters = bf->get_parameters(); + + combined_parameters.insert( + combined_parameters.end(), back_parameters.begin(), back_parameters.end()); + auto combinedf = std::make_shared(combined_outputs, combined_parameters); + // rerun Reshape elimination to help simplify the graph again, run CPUFusion + // this replaces nodes in both f and bf due to shared-ptr - ness + ngraph::pass::Manager pass_manager_comb; + pass_manager_comb.register_pass(); + pass_manager_comb.register_pass(); + pass_manager_comb.run_passes(combinedf); +} + +TEST(cpu_fusion, maxpool_with_indices_in_mxnet) +{ + auto f = make_forward_function(); + auto bfa = make_backward_function(f); + auto maybe_bf = bfa.first; + auto adjoints = bfa.second; + optimize_graph(f, maybe_bf); + auto fprop_cache = ngraph::cache_fprop(f, maybe_bf); + + auto mpwi_bprop = fprop_cache.bprop->get_results().at(0)->get_argument(0); + ASSERT_TRUE(std::dynamic_pointer_cast(mpwi_bprop->get_argument(0))); + ASSERT_TRUE(std::dynamic_pointer_cast(mpwi_bprop->get_argument(2))); +} + +TEST(cpu_fusion, conv_batch_norm_folding) +{ + Shape shape_input{1, 8, 3, 3}; + Shape shape_weights{2, 8, 1, 1}; + Shape shape_norm{2}; + + auto make_function = [shape_input, shape_weights, shape_norm]() { + auto input = std::make_shared(element::f32, shape_input); + auto weights = std::make_shared(element::f32, shape_weights); + double eps = 0.001; + auto gamma = std::make_shared(element::f32, shape_norm); + auto beta = std::make_shared(element::f32, shape_norm); + auto mean = std::make_shared(element::f32, shape_norm); + auto var = std::make_shared(element::f32, shape_norm); + auto conv = std::make_shared(input, weights, Strides{1, 1}, Strides{1, 1}); + auto bn = std::make_shared(conv, gamma, beta, mean, var, eps); + auto f = make_shared(NodeVector{bn}, + op::ParameterVector{input, weights, gamma, beta, mean, var}); + return f; + }; + + auto int_f = make_function(); + auto cpu_f = make_function(); + + vector> args{ + {1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f, 7.25f, 8.25f, -1.25f, + -1.25f, 1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f, -4.25f, 2.25f, + 4.25f, 4.25f, 0.f, 0.f, -1.f, 0.f, 2.f, 2.f, 0.f, 0.f, 0.f, + 0.f, 2.f, 2.f, 1.25f, 2.25f, 5.25f, 6.25f, 1.25f, 1.25f, 3.25f, 4.25f, + -7.25f, 8.25f, 1.25f, -1.25f, -1.25f, 2.25f, 3.25f, 2.25f, -4.25f, -4.25f, -1.25f, + -2.25f, 4.25f, 2.25f, 4.25f, 4.25f, 0.f, 0.f, 1.f, 0.f, -2.f, 2.f, + 0.f, 0.f, 0.f, 0.f, -2.f, -2.f}, + {1.25f, + 2.25f, + 5.25f, + 6.25f, + -1.25f, + -1.25f, + 3.25f, + -4.25f, + 7.25f, + 8.25f, + -1.25f, + 0.f, + 0.f, + 0.f, + 0.f, + -2.f}, + {-0.9384f, 0.01875f}, + {11.0f, 1.3f}, + {0.12f, 0.31f}, + {0.01f, 0.11f}, + }; + + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); +} + +TEST(cpu_fusion, convbias_batch_norm_folding) +{ + Shape shape_input{2, 8, 5, 5}; + Shape shape_weights{2, 8, 2, 2}; + Shape shape_norm{2}; + + auto make_function = [shape_input, shape_weights, shape_norm]() { + auto input = std::make_shared(element::f32, shape_input); + auto weights = std::make_shared(element::f32, shape_weights); + auto bias = std::make_shared(element::f32, Shape{2}); + double eps = 1.01; + auto gamma = std::make_shared(element::f32, shape_norm); + auto beta = std::make_shared(element::f32, shape_norm); + auto mean = std::make_shared(element::f32, shape_norm); + auto var = std::make_shared(element::f32, shape_norm); + auto conv = std::make_shared(input, weights, Strides{1, 1}, Strides{1, 1}); + auto convbias = + conv + std::make_shared(bias, conv->get_shape(), AxisSet{0, 2, 3}); + auto bn = std::make_shared(convbias, gamma, beta, mean, var, eps); + auto f = make_shared( + NodeVector{bn}, op::ParameterVector{input, weights, bias, gamma, beta, mean, var}); + return f; + }; + + auto int_f = make_function(); + auto cpu_f = make_function(); + + test::Uniform rng(1.0f, 100.0f); + vector> args; + for (shared_ptr param : cpu_f->get_parameters()) + { + vector tensor_val(shape_size(param->get_shape())); + rng.initialize(tensor_val); + args.push_back(tensor_val); + } + + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); +} + +TEST(cpu_fusion, conv_affine_folding) +{ + Shape shape_input{1, 8, 3, 3}; + Shape shape_weights{2, 8, 1, 1}; + Shape shape_norm{2}; + + auto make_function = [shape_input, shape_weights, shape_norm]() { + auto input = std::make_shared(element::f32, shape_input); + auto weights = std::make_shared(element::f32, shape_weights); + + auto a = std::make_shared(element::f32, shape_norm); + auto b = std::make_shared(element::f32, shape_norm); + auto conv = std::make_shared(input, weights, Strides{1, 1}, Strides{1, 1}); + auto out = std::make_shared( + std::make_shared( + conv, std::make_shared(a, conv->get_shape(), AxisSet{0, 2, 3})), + std::make_shared(b, conv->get_shape(), AxisSet{0, 2, 3})); + auto f = make_shared(NodeVector{out}, op::ParameterVector{input, weights, a, b}); + return f; + }; + + auto int_f = make_function(); + auto cpu_f = make_function(); + + vector> args{ + {1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f, 7.25f, 8.25f, -1.25f, + -1.25f, 1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f, -4.25f, 2.25f, + 4.25f, 4.25f, 0.f, 0.f, -1.f, 0.f, 2.f, 2.f, 0.f, 0.f, 0.f, + 0.f, 2.f, 2.f, 1.25f, 2.25f, 5.25f, 6.25f, 1.25f, 1.25f, 3.25f, 4.25f, + -7.25f, 8.25f, 1.25f, -1.25f, -1.25f, 2.25f, 3.25f, 2.25f, -4.25f, -4.25f, -1.25f, + -2.25f, 4.25f, 2.25f, 4.25f, 4.25f, 0.f, 0.f, 1.f, 0.f, -2.f, 2.f, + 0.f, 0.f, 0.f, 0.f, -2.f, -2.f}, + {1.25f, + 2.25f, + 5.25f, + 6.25f, + -1.25f, + -1.25f, + 3.25f, + -4.25f, + 7.25f, + 8.25f, + -1.25f, + 0.f, + 0.f, + 0.f, + 0.f, + -2.f}, + {-0.9384f, 0.01875f}, + {11.0f, 1.3f}, + }; + + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); +} + +TEST(cpu_fusion, convbias_affine_folding) +{ + Shape shape_input{1, 6, 3, 3}; + Shape shape_weights{3, 6, 1, 1}; + Shape shape_norm{3}; + + auto make_function = [shape_input, shape_weights, shape_norm]() { + auto input = std::make_shared(element::f32, shape_input); + auto weights = std::make_shared(element::f32, shape_weights); + auto bias = std::make_shared(element::f32, Shape{3}); + + auto a = std::make_shared(element::f32, shape_norm); + auto b = std::make_shared(element::f32, shape_norm); + auto conv = std::make_shared(input, weights, Strides{1, 1}, Strides{1, 1}); + auto convbias = + conv + std::make_shared(bias, conv->get_shape(), AxisSet{0, 2, 3}); + auto out = std::make_shared( + std::make_shared( + convbias, std::make_shared(a, conv->get_shape(), AxisSet{0, 2, 3})), + std::make_shared(b, conv->get_shape(), AxisSet{0, 2, 3})); + auto f = + make_shared(NodeVector{out}, op::ParameterVector{input, weights, bias, a, b}); + return f; + }; + + auto int_f = make_function(); + auto cpu_f = make_function(); + + test::Uniform rng(20.0f, 300.0f); + vector> args; + for (shared_ptr param : cpu_f->get_parameters()) + { + vector tensor_val(shape_size(param->get_shape())); + rng.initialize(tensor_val); + args.push_back(tensor_val); + } + + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); +} + +TEST(cpu_fusion, group_convolution_fusion) +{ + Shape shape_a{1, 32, 2, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{2, 16, 1, 1}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{1, 2, 2, 2}; + + auto a_slice0 = std::make_shared(A, Coordinate{0, 0, 0, 0}, Coordinate{1, 16, 2, 2}); + auto a_slice1 = + std::make_shared(A, Coordinate{0, 16, 0, 0}, Coordinate{1, 32, 2, 2}); + + auto b_slice0 = std::make_shared(B, Coordinate{0, 0, 0, 0}, Coordinate{1, 16, 1, 1}); + auto b_slice1 = std::make_shared(B, Coordinate{1, 0, 0, 0}, Coordinate{2, 16, 1, 1}); + + auto conv_lower = make_shared(a_slice0, + b_slice0, + Strides{1, 1}, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + auto conv_upper = make_shared(a_slice1, + b_slice1, + Strides{1, 1}, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + auto concat = make_shared(NodeVector{conv_lower, conv_upper}, 1); + + auto f = make_shared(NodeVector{concat}, op::ParameterVector{A, B}); + pass::Manager pass_manager; + pass_manager.register_pass("before_group.pdf"); + pass_manager.register_pass(); + pass_manager.register_pass("after_group.pdf"); + pass_manager.run_passes(f); + auto gc = + std::dynamic_pointer_cast(f->get_results().at(0)->get_argument(0)); + ASSERT_TRUE(gc); +} + +TEST(cpu_fusion, group_convolution) +{ + auto backend = runtime::Backend::create("CPU"); + test::Uniform rng(2.0f, 10.0f); + + const size_t GROUPS = 2; + Shape shape_a{1, 32, 2, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_b{2, 16, 1, 1}; + auto B = make_shared(element::f32, shape_b); + Shape shape_r{1, 2, 2, 2}; + auto group_conv = make_shared(A, + B, + Strides{1, 1}, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}, + GROUPS, + shape_r); + + Shape shape_c{1, 16, 2, 2}; + auto C = make_shared(element::f32, shape_c); + Shape shape_d{1, 16, 1, 1}; + auto D = make_shared(element::f32, shape_d); + auto conv_lower = make_shared(C, + D, + Strides{1, 1}, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + auto E = make_shared(element::f32, shape_c); + auto F = make_shared(element::f32, shape_d); + auto conv_upper = make_shared(E, + F, + Strides{1, 1}, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + auto f = make_shared(NodeVector{group_conv, conv_lower, conv_upper}, + op::ParameterVector{A, B, C, D, E, F}); + + auto a_ = rng.initialize(backend->create_tensor(element::f32, shape_a)); + auto b_ = rng.initialize(backend->create_tensor(element::f32, shape_b)); + + vector rv(shape_size(shape_r), 0); + auto group_result = std::dynamic_pointer_cast( + backend->create_tensor(element::f32, shape_r, rv.data())); + + auto av = read_vector(a_); + auto bv = read_vector(b_); + auto c_ = backend->create_tensor(element::f32, shape_c, av.data()); // lower data + auto d_ = backend->create_tensor(element::f32, shape_d, bv.data()); // upper data + + auto e_ = + backend->create_tensor(element::f32, shape_c, av.data() + av.size() / 2); // lower weights + auto f_ = + backend->create_tensor(element::f32, shape_d, bv.data() + bv.size() / 2); // upper weights + + Shape shape_ur{1, 1, 2, 2}; + // allocate a contigious storage for both lower and upper halves. + vector erv(shape_size(shape_r), 0); + auto lower_result = std::dynamic_pointer_cast( + backend->create_tensor(element::f32, shape_ur, erv.data())); + auto upper_result = std::dynamic_pointer_cast( + backend->create_tensor(element::f32, shape_ur, erv.data() + erv.size() / 2)); + backend->call_with_validate( + f, {group_result, lower_result, upper_result}, {a_, b_, c_, d_, e_, f_}); + ASSERT_EQ(rv, erv); +} + +//TODO(Pruthvi) enable this test after MKLDNN RNN bug is fixed +#if 0 +TEST(cpu_fusion, rnn_fprop_1_lstm_cell) +{ + auto src_layer = make_shared(element::f32, Shape{10, 100}); + auto src_iter = make_shared(element::f32, Shape{20, 100}); + auto weights_layer = make_shared(element::f32, Shape{400, 100}); + auto weights_iter = make_shared(element::f32, Shape{400, 100}); + auto biases = make_shared(element::f32, Shape{400}); + const int number_of_timesteps = 1; + const int number_of_gates_per_cell = 4; + const int src_seq_length = 1; + const int src_layer_feature_size = 100; + const int feature_size = 100; + const int num_rnn_cell_states = 2; + const int rnn_direction = 1; + const int num_of_rnn_fused_layer = 1; + auto rnn_node = make_shared(src_layer, + src_iter, + weights_layer, + weights_iter, + biases, + number_of_timesteps, + number_of_gates_per_cell, + src_seq_length, + src_layer_feature_size, + feature_size, + num_rnn_cell_states, + rnn_direction, + num_of_rnn_fused_layer); + auto rnn_ht_output = make_shared(rnn_node, 0); + auto rnn_ct_output = make_shared(rnn_node, 1); + + auto func = make_shared( + NodeVector{rnn_ht_output, rnn_ct_output}, + op::ParameterVector{src_layer, src_iter, weights_layer, weights_iter, biases}); + auto backend = runtime::Backend::create("CPU"); + + shared_ptr src_layer_t = + backend->create_tensor(element::f32, src_layer->get_shape()); + shared_ptr src_iter_t = + backend->create_tensor(element::f32, src_iter->get_shape()); + shared_ptr weights_layer_t = + backend->create_tensor(element::f32, weights_layer->get_shape()); + shared_ptr weights_iter_t = + backend->create_tensor(element::f32, weights_iter->get_shape()); + shared_ptr biases_t = + backend->create_tensor(element::f32, biases->get_shape()); + shared_ptr result_ht = backend->create_tensor(element::f32, {10, 100}); + shared_ptr result_ct = + backend->create_tensor(element::f32, Shape{20, 100}); + + copy_data(src_layer_t, vector(1000, 1)); + copy_data(src_iter_t, vector(2000, 1)); + copy_data(weights_layer_t, vector(400 * 100, 1)); + copy_data(weights_iter_t, vector(400 * 100, 1)); + copy_data(biases_t, vector(400, 1)); + + backend->call_with_validate( + func, + {result_ht, result_ct}, + {src_layer_t, src_iter_t, weights_layer_t, weights_iter_t, biases_t}); + vector expected_ht(10 * 100, 0.964028f); + vector expected_ct; + for (size_t i = 0; i < 20 * 100; i++) + { + if (i < 1000) + { + expected_ct.push_back(0.964028f); + } + else + { + expected_ct.push_back(2.0f); + } + } + + EXPECT_TRUE(test::all_close(expected_ht, read_vector(result_ht))); + EXPECT_TRUE(test::all_close(expected_ct, read_vector(result_ct))); +} +#endif + +TEST(cpu_fusion, fuse_lstm_cells) +{ + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass(); + const string json_path = + file_util::path_join(SERIALIZED_ZOO, "mxnet/2rnn_layer_3lstm_cell.json"); + const string json_string = file_util::read_file_to_string(json_path); + stringstream ss(json_string); + shared_ptr func = ngraph::deserialize(ss); + pass_manager.run_passes(func); + auto lstm_ops = get_ops_of_type(func); + EXPECT_EQ(lstm_ops.size(), 6); +} + +TEST(cpu_fusion, fuse_2_layer_rnn) +{ + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass(); + const string json_path = + file_util::path_join(SERIALIZED_ZOO, "mxnet/2rnn_layer_3lstm_cell.json"); + const string json_string = file_util::read_file_to_string(json_path); + stringstream ss(json_string); + shared_ptr func = ngraph::deserialize(ss); + pass_manager.run_passes(func); + size_t count = count_ops_of_type(func); + auto rnn_ops = get_ops_of_type(func); + EXPECT_EQ(rnn_ops.size(), count); + for (auto& node : rnn_ops) + { + EXPECT_EQ(node->get_num_timesteps(), node->get_src_sequence_length()); + EXPECT_EQ(node->get_num_cell_states(), node->get_argument(1)->get_arguments().size()); + } +} + +TEST(cpu_fusion, fuse_1_layer_rnn) +{ + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass(); + const string json_path = + file_util::path_join(SERIALIZED_ZOO, "mxnet/1rnn_layer_3lstm_cell.json"); + const string json_string = file_util::read_file_to_string(json_path); + stringstream ss(json_string); + shared_ptr func = ngraph::deserialize(ss); + pass_manager.run_passes(func); + size_t count = count_ops_of_type(func); + auto rnn_ops = get_ops_of_type(func); + EXPECT_EQ(rnn_ops.size(), 1); + EXPECT_EQ(rnn_ops.size(), count); + for (auto& node : rnn_ops) + { + EXPECT_EQ(node->get_num_timesteps(), node->get_src_sequence_length()); + EXPECT_EQ(node->get_num_cell_states(), node->get_argument(1)->get_arguments().size()); + } +} + +static std::shared_ptr make_function(const std::string& file_name) +{ + const string json_path = file_util::path_join(SERIALIZED_ZOO, file_name); + const string json_string = file_util::read_file_to_string(json_path); + stringstream ss(json_string); + shared_ptr func = ngraph::deserialize(ss); + return func; +} + +TEST(cpu_fusion, rnn_fusion_inter_vs_cpu_1lstm_cell) +{ + const std::string file_name("mxnet/1_lstm_cell_forward.json"); + auto cpu_f = make_function(file_name); + auto int_f = make_function(file_name); + test::Uniform rng(0.0f, 1.0f); + vector> args; + + for (shared_ptr param : int_f->get_parameters()) + { + vector tensor_val(shape_size(param->get_shape())); + rng.initialize(tensor_val); + args.push_back(tensor_val); + } + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + for (size_t i = 0; i < cpu_results.size(); i++) + { + EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); + } +} + +TEST(cpu_fusion, rnn_fusion_inter_vs_cpu_1rnn_layer_3lstm_cell) +{ + const std::string file_name("mxnet/1rnn_layer_3lstm_cell.json"); + auto cpu_f = make_function(file_name); + auto int_f = make_function(file_name); + test::Uniform rng(0.0f, 1.0f); + vector> args; + + for (shared_ptr param : int_f->get_parameters()) + { + vector tensor_val(shape_size(param->get_shape())); + rng.initialize(tensor_val); + args.push_back(tensor_val); + } + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + for (size_t i = 0; i < cpu_results.size(); i++) + { + EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); + } +} + +TEST(cpu_fusion, rnn_fusion_inter_vs_cpu_2rnn_layer_3lstm_cell) +{ + const std::string file_name("mxnet/2rnn_layer_3lstm_cell.json"); + auto cpu_f = make_function(file_name); + auto int_f = make_function(file_name); + test::Uniform rng(0.0f, 1.0f); + vector> args; + + for (shared_ptr param : int_f->get_parameters()) + { + vector tensor_val(shape_size(param->get_shape())); + rng.initialize(tensor_val); + args.push_back(tensor_val); + } + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + for (size_t i = 0; i < cpu_results.size(); i++) + { + EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); + } +} + +#if 0 + +TEST(cpu_fusion, loop_kernel_fusion_multiple_groups_pruned) +{ + auto make_function = []() -> std::shared_ptr { + Shape shape{}; + auto a = make_shared(element::f32, shape); + auto b = make_shared(element::f32, shape); + auto c = make_shared(element::f32, shape); + auto add_ab = a + b; + auto add_abs = std::make_shared(add_ab); + auto abs_neg = std::make_shared(add_abs); + auto sub_c_neg = c - abs_neg; + + auto d = make_shared(element::f32, shape); + auto d_abs = std::make_shared(d); + auto add_d = d_abs + add_ab; + auto neg_d = std::make_shared(add_d); + + auto mul_cd = neg_d * sub_c_neg; + auto f = + std::make_shared(ngraph::NodeVector{mul_cd}, op::ParameterVector{a, b, c, d}); + + return f; + }; + + pass::Manager pass_manager; + pass_manager.register_pass(3); + auto cpu_f = make_function(); + auto int_f = make_function(); + pass_manager.run_passes(cpu_f); + test::Uniform rng(-100.0f, 100.0f); + vector> args; + + size_t lkn = count_ops_of_type(cpu_f); + ASSERT_GT(lkn, 0); + + for (shared_ptr param : cpu_f->get_parameters()) + { + vector tensor_val(shape_size(param->get_shape())); + rng.initialize(tensor_val); + args.push_back(tensor_val); + } + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + for (size_t i = 0; i < cpu_results.size(); i++) + { + EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); + } +} + +TEST(cpu_fusion, loop_kernel_fusion_bounded_relu) +{ + auto make_function = []() -> std::shared_ptr { + Shape shape{}; + auto a = make_shared(element::f32, shape); + auto relu = make_shared(a); + auto upper_bound = + op::Constant::create(element::f32, shape, std::vector{6.0f}); + auto minn = make_shared(relu, upper_bound); + auto absn = make_shared(minn); + auto negn = std::make_shared(absn); + + auto f = std::make_shared(ngraph::NodeVector{negn}, op::ParameterVector{a}); + + return f; + }; + + pass::Manager pass_manager; + pass_manager.register_pass("before_relu_fusion.pdf"); + pass_manager.register_pass(3); + pass_manager.register_pass("after_relu_fusion.pdf"); + auto cpu_f = make_function(); + auto int_f = make_function(); + pass_manager.run_passes(cpu_f); + test::Uniform rng(-100.0f, 100.0f); + vector> args; + + size_t lkn = count_ops_of_type(cpu_f); + ASSERT_GT(lkn, 0); + + for (shared_ptr param : cpu_f->get_parameters()) + { + vector tensor_val(shape_size(param->get_shape())); + rng.initialize(tensor_val); + args.push_back(tensor_val); + } + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + for (size_t i = 0; i < cpu_results.size(); i++) + { + EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); + } +} + +TEST(cpu_fusion, loop_kernel_fusion_multiple_groups) +{ + auto make_function = []() -> std::shared_ptr { + Shape shape{}; + auto a = make_shared(element::f32, shape); + auto b = make_shared(element::f32, shape); + auto c = make_shared(element::f32, shape); + auto add_ab = a + b; + auto add_abs = std::make_shared(add_ab); + auto abs_neg = std::make_shared(add_abs); + auto sub_c_neg = c - abs_neg; + + auto d = make_shared(element::f32, shape); + auto d_abs = std::make_shared(d); + auto add_d = d_abs + add_ab; + auto neg_d = std::make_shared(add_d); + + auto mul_cd = neg_d * sub_c_neg; + auto f = + std::make_shared(ngraph::NodeVector{mul_cd}, op::ParameterVector{a, b, c, d}); + + return f; + }; + + pass::Manager pass_manager; + pass_manager.register_pass(2); + auto cpu_f = make_function(); + auto int_f = make_function(); + pass_manager.run_passes(cpu_f); + test::Uniform rng(-100.0f, 100.0f); + vector> args; + + size_t lkn = count_ops_of_type(cpu_f); + ASSERT_GT(lkn, 0); + + for (shared_ptr param : cpu_f->get_parameters()) + { + vector tensor_val(shape_size(param->get_shape())); + rng.initialize(tensor_val); + args.push_back(tensor_val); + } + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + for (size_t i = 0; i < cpu_results.size(); i++) + { + EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); + } +} + +TEST(cpu_fusion, loop_kernel_fusion_one_group) +{ + auto make_function = []() -> std::shared_ptr { + Shape shape{}; + auto a = make_shared(element::f32, shape); + auto b = make_shared(element::f32, shape); + auto c = make_shared(element::f32, shape); + auto add_ab = a + b; + auto add_abs = std::make_shared(add_ab); + auto abs_neg = std::make_shared(add_abs); + auto sub_c_neg = c - abs_neg; + auto d = make_shared(element::f32, shape); + auto add_d = sub_c_neg + d; + auto abs_add_d = std::make_shared(add_d); + auto e = make_shared(element::f32, shape); + auto add_e = e + abs_add_d; + auto neg_e = std::make_shared(add_e); + + auto f = std::make_shared(ngraph::NodeVector{neg_e}, + op::ParameterVector{a, b, c, d, e}); + + return f; + + }; + + pass::Manager pass_manager; + pass_manager.register_pass(2); + auto cpu_f = make_function(); + auto int_f = make_function(); + pass_manager.run_passes(cpu_f); + test::Uniform rng(-100.0f, 100.0f); + vector> args; + + size_t lkn = count_ops_of_type(cpu_f); + ASSERT_GT(lkn, 0); + + for (shared_ptr param : cpu_f->get_parameters()) + { + vector tensor_val(shape_size(param->get_shape())); + rng.initialize(tensor_val); + args.push_back(tensor_val); + } + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + for (size_t i = 0; i < cpu_results.size(); i++) + { + EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); + } +} + +#endif + +TEST(cpu_fusion, sigmoid_multiply_fusion) +{ + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass(); + const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/3_lstm_cell_forward.json"); + const string json_string = file_util::read_file_to_string(json_path); + stringstream ss(json_string); + shared_ptr func = ngraph::deserialize(ss); + pass_manager.run_passes(func); + size_t ccg = count_ops_of_type(func); + ASSERT_EQ(ccg, 18); +} + +void sigmoid_multiply_fusion_forward_compute(runtime::Backend* backend, + const op::ParameterVector& input_params, + const vector>& input_data, + const vector& input_shapes, + const Shape& result_shape, + shared_ptr input_0_node, + shared_ptr input_1_node, + const vector& expected) +{ + shared_ptr result_tensor = backend->create_tensor(element::f32, result_shape); + + vector> input_tensors; + for (int i = 0; i < input_params.size(); ++i) + { + input_tensors.push_back(backend->create_tensor(element::f32, input_shapes[i])); + copy_data(input_tensors[i], input_data[i]); + } + + auto mul_node = input_0_node * input_1_node; + auto func = make_shared(mul_node, input_params); + backend->call_with_validate(func, {result_tensor}, input_tensors); + EXPECT_TRUE(test::all_close(read_vector(result_tensor), expected)); +} + +TEST(cpu_fusion, sigmoid_multiply_fusion_forward) +{ + auto backend = runtime::Backend::create("CPU"); + + Shape data_shape{1, 1, 2, 2}; + Shape const_shape{1}; + + vector input_0_data{1.f, 2.f, 3.f, 4.f}; + vector input_1_data{1.2f, 2.3f, 3.5f, 4.7f}; + vector const_data{1.2f}; + { + auto input_0_param = make_shared(element::f32, data_shape); + auto input_1_param = make_shared(element::f32, data_shape); + auto input_2_param = make_shared(element::f32, data_shape); + auto sigmoid_0 = make_shared(input_0_param); + auto sigmoid_1 = make_shared(input_1_param, input_2_param); + vector expected{1.60833f, 3.78743f, 6.19173f, 8.54352f}; + op::ParameterVector input_params{input_0_param, input_1_param, input_2_param}; + vector> input_data{input_0_data, input_0_data, input_1_data}; + vector input_shapes{data_shape, data_shape, data_shape}; + sigmoid_multiply_fusion_forward_compute(backend.get(), + input_params, + input_data, + input_shapes, + data_shape, + sigmoid_0, + sigmoid_1, + expected); + } + { + auto input_0_param = make_shared(element::f32, data_shape); + auto input_1_param = make_shared(element::f32, const_shape); + auto sigmoid_0 = make_shared(input_1_param, data_shape, AxisSet{1, 2, 3}); + auto sigmoid_1 = make_shared(input_0_param); + vector expected{0.87727f, 1.05696f, 1.14309f, 1.17842f}; + op::ParameterVector input_params{input_0_param, input_1_param}; + vector> input_data{input_0_data, const_data}; + vector input_shapes{data_shape, const_shape}; + sigmoid_multiply_fusion_forward_compute(backend.get(), + input_params, + input_data, + input_shapes, + data_shape, + sigmoid_0, + sigmoid_1, + expected); + } + { + auto input_0_param = make_shared(element::f32, data_shape); + auto input_1_param = make_shared(element::f32, const_shape); + auto sigmoid_0 = make_shared(input_0_param); + auto sigmoid_1 = make_shared(input_1_param, data_shape, AxisSet{1, 2, 3}); + vector expected{0.87727f, 1.05696f, 1.14309f, 1.17842f}; + op::ParameterVector input_params{input_0_param, input_1_param}; + vector> input_data{input_0_data, const_data}; + vector input_shapes{data_shape, const_shape}; + sigmoid_multiply_fusion_forward_compute(backend.get(), + input_params, + input_data, + input_shapes, + data_shape, + sigmoid_0, + sigmoid_1, + expected); + } + { + auto input_0_param = make_shared(element::f32, data_shape); + auto input_1_param = make_shared(element::f32, data_shape); + auto sigmoid_0 = make_shared(input_0_param); + auto sigmoid_1 = make_shared(input_1_param); + vector expected{0.561837f, 0.800536f, 0.924652f, 0.973163f}; + op::ParameterVector input_params{input_0_param, input_1_param}; + vector> input_data{input_0_data, input_1_data}; + vector input_shapes{data_shape, data_shape}; + sigmoid_multiply_fusion_forward_compute(backend.get(), + input_params, + input_data, + input_shapes, + data_shape, + sigmoid_0, + sigmoid_1, + expected); + } + { + auto input_0_param = make_shared(element::f32, data_shape); + auto input_1_param = make_shared(element::f32, data_shape); + auto sigmoid_0 = make_shared(input_0_param); + auto sigmoid_1 = make_shared(input_1_param); + vector expected{0.60945f, 0.863266f, 0.950838f, 0.981851f}; + op::ParameterVector input_params{input_0_param, input_1_param}; + vector> input_data{input_0_data, input_1_data}; + vector input_shapes{data_shape, data_shape}; + sigmoid_multiply_fusion_forward_compute(backend.get(), + input_params, + input_data, + input_shapes, + data_shape, + sigmoid_0, + sigmoid_1, + expected); + } + { + auto input_0_param = make_shared(element::f32, data_shape); + auto input_1_param = make_shared(element::f32, data_shape); + auto sigmoid_0 = make_shared(input_0_param); + auto sigmoid_1 = make_shared(input_1_param); + vector expected{0.585304f, 0.876182f, 0.965887f, 0.990322f}; + op::ParameterVector input_params{input_0_param, input_1_param}; + vector> input_data{input_0_data, input_1_data}; + vector input_shapes{data_shape, data_shape}; + sigmoid_multiply_fusion_forward_compute(backend.get(), + input_params, + input_data, + input_shapes, + data_shape, + sigmoid_0, + sigmoid_1, + expected); + } + { + auto input_0_param = make_shared(element::f32, data_shape); + auto input_1_param = make_shared(element::f32, data_shape); + auto sigmoid_0 = make_shared(input_0_param); + auto sigmoid_1 = make_shared(input_1_param); + vector expected{0.634907f, 0.94484f, 0.993242f, 0.999164f}; + op::ParameterVector input_params{input_0_param, input_1_param}; + vector> input_data{input_0_data, input_1_data}; + vector input_shapes{data_shape, data_shape}; + sigmoid_multiply_fusion_forward_compute(backend.get(), + input_params, + input_data, + input_shapes, + data_shape, + sigmoid_0, + sigmoid_1, + expected); + } +} + +void sigmoid_multiply_fusion_backward_compute(runtime::Backend* backend, + const op::ParameterVector& input_params, + const vector>& input_data, + const vector& input_shapes, + const vector delta_data, + const Shape& delta_shape, + const Shape& d_input_0_shape, + const Shape& d_input_1_shape, + shared_ptr input_0_node, + shared_ptr input_1_node, + shared_ptr input_0_adjoint, + shared_ptr input_1_adjoint, + const vector& expected_0, + const vector& expected_1) +{ + vector> input_tensors; + for (int i = 0; i < input_params.size(); ++i) + { + input_tensors.push_back(backend->create_tensor(element::f32, input_shapes[i])); + copy_data(input_tensors[i], input_data[i]); + } + + auto delta_param = make_shared(element::f32, delta_shape); + shared_ptr delta_tensor = backend->create_tensor(element::f32, delta_shape); + copy_data(delta_tensor, delta_data); + + op::ParameterVector back_params(input_params); + back_params.push_back(delta_param); + input_tensors.push_back(delta_tensor); + + shared_ptr d_input_0_tensor = + backend->create_tensor(element::f32, d_input_0_shape); + shared_ptr d_input_1_tensor = + backend->create_tensor(element::f32, d_input_1_shape); + + using FunctionType = op::SigmoidMultiply::FunctionType; + auto input_0_type = op::SigmoidMultiply::identify_node_type(input_0_node); + auto input_1_type = op::SigmoidMultiply::identify_node_type(input_1_node); + // for Identity functions, we use the node itself, otherwise use its input + // where we will apply the function of input node + auto input_0_alt = + (input_0_type == FunctionType::Identity) ? input_0_node : input_0_node->get_argument(0); + auto input_1_alt = + (input_1_type == FunctionType::Identity) ? input_1_node : input_1_node->get_argument(0); + auto sigmoid_mul = + make_shared(input_0_alt, input_1_alt, input_0_type, input_1_type); + + ngraph::autodiff::Adjoints adjoints(NodeVector{sigmoid_mul}, NodeVector{delta_param}); + auto d_input_0 = adjoints.backprop_node(input_0_adjoint); + auto d_input_1 = adjoints.backprop_node(input_1_adjoint); + auto df = make_shared(NodeVector{d_input_0, d_input_1}, back_params); + backend->call_with_validate(df, {d_input_0_tensor, d_input_1_tensor}, input_tensors); + EXPECT_TRUE(test::all_close(read_vector(d_input_0_tensor), expected_0)); + EXPECT_TRUE(test::all_close(read_vector(d_input_1_tensor), expected_1)); +} + +TEST(cpu_fusion, sigmoid_multiply_fusion_backward) +{ + auto backend = runtime::Backend::create("CPU"); + + Shape data_shape{1, 1, 2, 2}; + Shape const_shape{1}; + + vector input_0_data{1.f, 2.f, 3.f, 4.f}; + vector input_1_data{1.2f, 2.2f, 3.2f, 4.2f}; + vector const_data{1.2f}; + vector delta_data(shape_size(data_shape), 20.0f); + + { + auto input_0_param = make_shared(element::f32, data_shape); + auto input_1_param = make_shared(element::f32, data_shape); + auto input_2_param = make_shared(element::f32, data_shape); + auto sigmoid_0 = make_shared(input_0_param); + auto sigmoid_1 = make_shared(input_1_param, input_2_param); + vector expected_0{8.65093f, 8.81946f, 5.60191f, 2.89668f}; + vector expected_1{14.6212f, 17.6159f, 19.0515f, 19.6403f}; + op::ParameterVector input_params{input_0_param, input_1_param, input_2_param}; + vector> input_data{input_0_data, input_0_data, input_1_data}; + vector input_shapes{data_shape, data_shape, data_shape}; + sigmoid_multiply_fusion_backward_compute(backend.get(), + input_params, + input_data, + input_shapes, + delta_data, + data_shape, + data_shape, + data_shape, + sigmoid_0, + sigmoid_1, + input_0_param, + sigmoid_1, + expected_0, + expected_1); + } + { + auto input_0_param = make_shared(element::f32, data_shape); + auto input_1_param = make_shared(element::f32, const_shape); + auto sigmoid_0 = make_shared(input_1_param, data_shape, AxisSet{1, 2, 3}); + auto sigmoid_1 = make_shared(input_0_param); + vector expected_0{15.2319f, 19.2806f, 19.9011f, 19.9866f}; + vector expected_1{10.0794f, 1.69562f, 0.236785f, 0.0321828f}; + op::ParameterVector input_params{input_0_param, input_1_param}; + vector> input_data{input_0_data, const_data}; + vector input_shapes{data_shape, const_shape}; + sigmoid_multiply_fusion_backward_compute(backend.get(), + input_params, + input_data, + input_shapes, + delta_data, + data_shape, + data_shape, + data_shape, + sigmoid_0, + sigmoid_1, + sigmoid_0, + input_0_param, + expected_0, + expected_1); + } + { + auto input_0_param = make_shared(element::f32, data_shape); + auto input_1_param = make_shared(element::f32, const_shape); + auto sigmoid_0 = make_shared(input_0_param); + auto sigmoid_1 = make_shared(input_1_param, data_shape, AxisSet{1, 2, 3}); + vector expected_0{10.0794f, 1.69562f, 0.236785f, 0.0321828f}; + vector expected_1{15.2319f, 19.2806f, 19.9011f, 19.9866f}; + op::ParameterVector input_params{input_0_param, input_1_param}; + vector> input_data{input_0_data, const_data}; + vector input_shapes{data_shape, const_shape}; + sigmoid_multiply_fusion_backward_compute(backend.get(), + input_params, + input_data, + input_shapes, + delta_data, + data_shape, + data_shape, + data_shape, + sigmoid_0, + sigmoid_1, + input_0_param, + sigmoid_1, + expected_0, + expected_1); + } + { + auto input_0_param = make_shared(element::f32, data_shape); + auto input_1_param = make_shared(element::f32, data_shape); + auto sigmoid_0 = make_shared(input_0_param); + auto sigmoid_1 = make_shared(input_1_param); + vector expected_0{3.02202f, 1.89041f, 0.868146f, 0.348035f}; + vector expected_1{2.60102f, 1.58192f, 0.716941f, 0.285879f}; + op::ParameterVector input_params{input_0_param, input_1_param}; + vector> input_data{input_0_data, input_1_data}; + vector input_shapes{data_shape, data_shape}; + sigmoid_multiply_fusion_backward_compute(backend.get(), + input_params, + input_data, + input_shapes, + delta_data, + data_shape, + data_shape, + data_shape, + sigmoid_0, + sigmoid_1, + input_0_param, + input_1_param, + expected_0, + expected_1); + } + { + auto input_0_param = make_shared(element::f32, data_shape); + auto input_1_param = make_shared(element::f32, data_shape); + auto sigmoid_0 = make_shared(input_0_param); + auto sigmoid_1 = make_shared(input_1_param); + vector expected_0{3.27813f, 2.04894f, 0.900536f, 0.353095f}; + vector expected_1{4.45975f, 0.84425f, 0.126201f, 0.0176579f}; + op::ParameterVector input_params{input_0_param, input_1_param}; + vector> input_data{input_0_data, input_1_data}; + vector input_shapes{data_shape, data_shape}; + sigmoid_multiply_fusion_backward_compute(backend.get(), + input_params, + input_data, + input_shapes, + delta_data, + data_shape, + data_shape, + data_shape, + sigmoid_0, + sigmoid_1, + input_0_param, + input_1_param, + expected_0, + expected_1); + } + { + auto input_0_param = make_shared(element::f32, data_shape); + auto input_1_param = make_shared(element::f32, data_shape); + auto sigmoid_0 = make_shared(input_0_param); + auto sigmoid_1 = make_shared(input_1_param); + vector expected_0{6.45521f, 1.27207f, 0.189593f, 0.0264228f}; + vector expected_1{2.70967f, 1.7314f, 0.748913f, 0.29092f}; + op::ParameterVector input_params{input_0_param, input_1_param}; + vector> input_data{input_0_data, input_1_data}; + vector input_shapes{data_shape, data_shape}; + sigmoid_multiply_fusion_backward_compute(backend.get(), + input_params, + input_data, + input_shapes, + delta_data, + data_shape, + data_shape, + data_shape, + sigmoid_0, + sigmoid_1, + input_0_param, + input_1_param, + expected_0, + expected_1); + } + { + auto input_0_param = make_shared(element::f32, data_shape); + auto input_1_param = make_shared(element::f32, data_shape); + auto sigmoid_0 = make_shared(input_0_param); + auto sigmoid_1 = make_shared(input_1_param); + vector expected_0{7.00227f, 1.37874f, 0.196666f, 0.026807f}; + vector expected_1{4.64603f, 0.924027f, 0.131829f, 0.0179692f}; + op::ParameterVector input_params{input_0_param, input_1_param}; + vector> input_data{input_0_data, input_1_data}; + vector input_shapes{data_shape, data_shape}; + sigmoid_multiply_fusion_backward_compute(backend.get(), + input_params, + input_data, + input_shapes, + delta_data, + data_shape, + data_shape, + data_shape, + sigmoid_0, + sigmoid_1, + input_0_param, + input_1_param, + expected_0, + expected_1); + } +} + +TEST(cpu_fusion, fuse_batch_dot) +{ + pass::Manager pass_manager; + pass_manager.register_pass(); + const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/batch_dot_3.json"); + const string json_string = file_util::read_file_to_string(json_path); + stringstream ss(json_string); + shared_ptr func = ngraph::deserialize(ss); + pass_manager.run_passes(func); + size_t ccg = count_ops_of_type(func); + ASSERT_EQ(ccg, 1); +} + +TEST(cpu_fusion, fuse_batch_dot_forward) +{ + pass::Manager pass_manager; + pass_manager.register_pass(); + + const std::string file_name("mxnet/batch_dot_3.json"); + auto cpu_f = make_function(file_name); + auto int_f = make_function(file_name); + pass_manager.run_passes(cpu_f); + test::Uniform rng(0.0f, 1.0f); + vector> args; + + for (shared_ptr param : int_f->get_parameters()) + { + vector tensor_val(shape_size(param->get_shape())); + rng.initialize(tensor_val); + args.push_back(tensor_val); + } + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + for (size_t i = 0; i < int_results.size(); i++) + { + EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); + } +} + +TEST(cpu_fusion, fuse_rnn_across_layer) +{ + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass(); + pass_manager.register_pass(); + pass_manager.register_pass(); + const string json_path = + file_util::path_join(SERIALIZED_ZOO, "mxnet/2rnn_layer_1timestep.json"); + const string json_string = file_util::read_file_to_string(json_path); + stringstream ss(json_string); + shared_ptr func = ngraph::deserialize(ss); + pass_manager.run_passes(func); + size_t ref_rnn_count = 1; + auto rnn_count = count_ops_of_type(func); + EXPECT_EQ(ref_rnn_count, rnn_count); +} + +TEST(cpu_fusion, fuse_rnn_across_2layer_1timestep) +{ + const std::string file_name("mxnet/2rnn_layer_1timestep.json"); + auto cpu_f = make_function(file_name); + auto int_f = make_function(file_name); + test::Uniform rng(0.0f, 1.0f); + vector> args; + + for (shared_ptr param : int_f->get_parameters()) + { + vector tensor_val(shape_size(param->get_shape())); + rng.initialize(tensor_val); + args.push_back(tensor_val); + } + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + + // TODO (pruthvi): Enable this after fixing failing + // mxnet rnn unit tests + // EXPECT_EQ(1, count_ops_of_type(cpu_f)); + for (size_t i = 0; i < cpu_results.size(); i++) + { + EXPECT_TRUE(test::all_close(cpu_results.at(1), int_results.at(1), 1.0e-4f, 1.0e-4f)); + } +} + +static void check_bounded_relu(Shape param_shape, float constant_val) +{ + auto make_function = [](Shape input_shape, float alpha_val) { + auto relu_input = std::make_shared(element::f32, input_shape); + auto relu = std::make_shared(relu_input); + auto alpha = op::Constant::create( + element::f32, input_shape, std::vector(1.0f, alpha_val)); + auto min = std::make_shared(relu, alpha); + auto f = make_shared(NodeVector{min}, op::ParameterVector{relu_input}); + return f; + }; + + auto cpu_f = make_function(param_shape, constant_val); + auto int_f = make_function(param_shape, constant_val); + test::Uniform rng(-10.0f, 10.0f); + vector> args; + + for (shared_ptr param : int_f->get_parameters()) + { + vector tensor_val(shape_size(param->get_shape())); + rng.initialize(tensor_val); + args.push_back(tensor_val); + } + auto int_results = execute(int_f, args, "INTERPRETER"); + auto cpu_results = execute(cpu_f, args, "CPU"); + + EXPECT_EQ(1, count_ops_of_type(cpu_f)); + EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0), 1.0e-4f, 1.0e-4f)); +} + +TEST(cpu_fusion, fuse_bounded_relu_inter_vs_cpu) +{ + check_bounded_relu(Shape{4, 3, 2, 2}, 6.0f); + check_bounded_relu(Shape{4, 3}, 4.0f); + check_bounded_relu(Shape{4, 3, 2}, 2.0f); +} + +TEST(cpu_fusion, dot_batch_forward) +{ + const Shape shape_a{2, 3, 2}; + const Shape shape_b{2, 3}; + + auto generate_func = [&shape_a, &shape_b]() -> shared_ptr { + auto a = make_shared(element::f32, shape_a); + auto b = make_shared(element::f32, shape_b); + auto dot = make_shared(a, b); + return make_shared(dot, op::ParameterVector{a, b}); + }; + shared_ptr cpu_func = generate_func(); + shared_ptr int_func = generate_func(); + + test::Uniform rng(0.0f, 1.0f); + vector> args; + for (shared_ptr param : int_func->get_parameters()) + { + vector tensor_val(shape_size(param->get_shape())); + rng.initialize(tensor_val); + args.push_back(tensor_val); + } + + auto int_results = execute(int_func, args, "INTERPRETER"); + auto cpu_results = execute(cpu_func, args, "CPU"); + for (size_t i = 0; i < cpu_results.size(); i++) + { + EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); + } +} +static std::shared_ptr + create_rnn_input_linear_transformation_function(size_t num_timesteps, bool data_is_4d = false) +{ + auto W = std::make_shared(element::f32, Shape{400, 50}); + auto bias = std::make_shared(element::f32, Shape{400}); + op::ParameterVector params{W, bias}; + auto create_graph = [&]() -> std::shared_ptr { + + auto data_param = (data_is_4d) + ? std::make_shared(element::f32, Shape{2, 5, 1, 50}) + : std::make_shared(element::f32, Shape{10, 1, 50}); + params.push_back(data_param); + auto reshape_axis_order = data_is_4d ? AxisVector{0, 1, 2, 3} : AxisVector{0, 1, 2}; + auto data_param_reshape = + std::make_shared(data_param, reshape_axis_order, Shape{10, 50}); + auto W_reshape = std::make_shared(W, AxisVector{1, 0}, Shape{50, 400}); + auto dot = std::make_shared(data_param_reshape, W_reshape); + auto bias_broadcast = make_shared(bias, dot->get_shape(), AxisSet{0}); + auto add_bias = std::make_shared(dot, bias_broadcast); + return add_bias; + + }; + + NodeVector graph_nodes; + for (size_t i = 0; i < num_timesteps; i++) + { + graph_nodes.push_back(create_graph()); + } + auto concat = std::make_shared(graph_nodes, 0); + return make_shared(NodeVector{concat}, params); +} + +TEST(cpu_fusion, fuse_rnn_input_across_time_steps) +{ + auto func = create_rnn_input_linear_transformation_function(10); + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass(); + pass_manager.run_passes(func); + size_t ref_matmulbias_count = 1; + auto matmulbias_count = count_ops_of_type(func); + EXPECT_EQ(ref_matmulbias_count, matmulbias_count); +} + +TEST(cpu_fusion, fuse_rnn_input_across_time_steps_4d_data) +{ + auto func = create_rnn_input_linear_transformation_function(10, true); + pass::Manager pass_manager; + pass_manager.register_pass(); + pass_manager.register_pass(); + pass_manager.run_passes(func); + size_t ref_matmulbias_count = 10; // no CPURnnMatFusion transformations + auto matmulbias_count = count_ops_of_type(func); + EXPECT_EQ(ref_matmulbias_count, matmulbias_count); +} + +TEST(cpu_fusion, rnn_input_fusion_inter_vs_cpu) +{ + shared_ptr cpu_func = create_rnn_input_linear_transformation_function(10); + shared_ptr int_func = create_rnn_input_linear_transformation_function(10); + + test::Uniform rng(-10.0f, 10.0f); + vector> args; + for (shared_ptr param : int_func->get_parameters()) + { + vector tensor_val(shape_size(param->get_shape())); + rng.initialize(tensor_val); + args.push_back(tensor_val); + } + + auto int_results = execute(int_func, args, "INTERPRETER"); + auto cpu_results = execute(cpu_func, args, "CPU"); + for (size_t i = 0; i < cpu_results.size(); i++) + { + EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); + } +} diff --git a/test/type_prop.cpp b/test/type_prop.cpp index e2dcf156109..282ff1622f6 100644 --- a/test/type_prop.cpp +++ b/test/type_prop.cpp @@ -208,7 +208,7 @@ TEST(type_prop, batchnorm_training_rank_less_than_2) auto dummy = make_shared(element::f32, Shape{1}); try { - auto bc = make_shared(0.001, dummy, dummy, dummy); + auto bc = make_shared(dummy, dummy, dummy, 0.001); FAIL() << "BatchNorm c-tor should throw for tensors whose rank is less than 2"; } catch (const NodeValidationError& error) @@ -229,7 +229,7 @@ TEST(type_prop, batchnorm_training_zero_channel_check) auto beta = make_shared(element::f32, Shape{0}); try { - auto bc = make_shared(0.001, gamma, beta, data_batch); + auto bc = make_shared(data_batch, gamma, beta, 0.001); FAIL() << "BatchNorm c-tor should throw for tensors w/ zero-dimension channels"; } catch (const NodeValidationError& error) @@ -250,7 +250,7 @@ TEST(type_prop, batchnorm_training_et_check) try { - auto bc = make_shared(0.001, gamma, beta, data_batch); + auto bc = make_shared(data_batch, gamma, beta, 0.001); FAIL() << "BatchNorm c-tor should throw for different element types"; } catch (const NodeValidationError& error) @@ -271,7 +271,7 @@ TEST(type_prop, batchnorm_training_shape_check) try { - auto bc = make_shared(0.001, gamma, beta, data_batch); + auto bc = make_shared(data_batch, gamma, beta, 0.001); FAIL() << "BatchNorm c-tor should throw if gamma and beta shapes don't match"; } catch (const NodeValidationError& error) @@ -296,7 +296,7 @@ TEST(type_prop, batchnorm_training_backprop_et_check) try { auto bc = make_shared( - 0.001, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, 0.001); FAIL() << "Deduced type should disagree with c-tor arguments"; } catch (const NodeValidationError& error) @@ -321,7 +321,7 @@ TEST(type_prop, batchnorm_training_backprop_shape_check) try { auto bc = make_shared( - 0.001, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, 0.001); FAIL() << "Deduced type should disagree with c-tor arguments"; } catch (const NodeValidationError& error) @@ -345,7 +345,7 @@ TEST(type_prop, batchnorm_training_backprop_delta_check) try { auto bc = make_shared( - 0.001, dummy, dummy, param, dummy, dummy, delta); + param, dummy, dummy, dummy, dummy, delta, 0.001); FAIL() << "Deduced type should disagree with c-tor arguments"; } catch (const NodeValidationError& error) @@ -379,7 +379,7 @@ TEST(type_prop, batchnorm_inference_partial_all_rank_dynamic) auto mean = make_shared(mean_et, mean_shape); auto variance = make_shared(variance_et, variance_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch, mean, variance); + auto bn = make_shared(data_batch, gamma, beta, mean, variance, epsilon); ASSERT_EQ(bn->get_output_size(), 1); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -407,7 +407,7 @@ TEST(type_prop, batchnorm_inference_partial_input_rank_static_dynamic_ok) auto mean = make_shared(mean_et, mean_shape); auto variance = make_shared(variance_et, variance_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch, mean, variance); + auto bn = make_shared(data_batch, gamma, beta, mean, variance, epsilon); ASSERT_EQ(bn->get_output_size(), 1); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -439,7 +439,7 @@ TEST(type_prop, batchnorm_inference_partial_input_rank_static_dynamic_zero_chann try { auto bn = - make_shared(epsilon, gamma, beta, data_batch, mean, variance); + make_shared(data_batch, gamma, beta, mean, variance, epsilon); FAIL() << "Zero channel count not detected"; } catch (const NodeValidationError& error) @@ -472,7 +472,7 @@ TEST(type_prop, batchnorm_inference_partial_input_rank_dynamic_some_rank_static_ auto mean = make_shared(mean_et, mean_shape); auto variance = make_shared(variance_et, variance_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch, mean, variance); + auto bn = make_shared(data_batch, gamma, beta, mean, variance, epsilon); ASSERT_EQ(bn->get_output_size(), 1); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -502,7 +502,7 @@ TEST(type_prop, batchnorm_inference_partial_input_rank_dynamic_some_rank_static_ try { auto bn = - make_shared(epsilon, gamma, beta, data_batch, mean, variance); + make_shared(data_batch, gamma, beta, mean, variance, epsilon); FAIL() << "Wrong gamma/beta/mean/variance shape not detected"; } catch (const NodeValidationError& error) @@ -541,7 +541,7 @@ TEST(type_prop, try { auto bn = - make_shared(epsilon, gamma, beta, data_batch, mean, variance); + make_shared(data_batch, gamma, beta, mean, variance, epsilon); FAIL() << "Inconsistent gamma/beta/mean/variance shape not detected"; } catch (const NodeValidationError& error) @@ -579,7 +579,7 @@ TEST(type_prop, try { auto bn = - make_shared(epsilon, gamma, beta, data_batch, mean, variance); + make_shared(data_batch, gamma, beta, mean, variance, epsilon); FAIL() << "Inconsistent gamma/beta/mean/variance channel count not detected"; } catch (const NodeValidationError& error) @@ -613,7 +613,7 @@ TEST(type_prop, batchnorm_inference_partial_input_rank_static_dynamic_some_stati auto mean = make_shared(mean_et, mean_shape); auto variance = make_shared(variance_et, variance_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch, mean, variance); + auto bn = make_shared(data_batch, gamma, beta, mean, variance, epsilon); ASSERT_EQ(bn->get_output_size(), 1); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -645,7 +645,7 @@ TEST(type_prop, try { auto bn = - make_shared(epsilon, gamma, beta, data_batch, mean, variance); + make_shared(data_batch, gamma, beta, mean, variance, epsilon); FAIL() << "Inconsistent input/gamma/beta/mean/variance channel count not detected"; } catch (const NodeValidationError& error) @@ -674,7 +674,7 @@ TEST(type_prop, batchnorm_training_partial_all_rank_dynamic) auto gamma = make_shared(gamma_et, gamma_shape); auto beta = make_shared(beta_et, beta_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -700,7 +700,7 @@ TEST(type_prop, batchnorm_training_partial_input_rank_static_dynamic_batch_size_ auto gamma = make_shared(gamma_et, gamma_shape); auto beta = make_shared(beta_et, beta_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -727,7 +727,7 @@ TEST(type_prop, batchnorm_training_partial_input_rank_static_dynamic_channel_cou auto gamma = make_shared(gamma_et, gamma_shape); auto beta = make_shared(beta_et, beta_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -755,7 +755,7 @@ TEST(type_prop, batchnorm_training_partial_input_rank_static_dynamic_zero_channe auto beta = make_shared(beta_et, beta_shape); try { - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); FAIL() << "Zero channel count not detected"; } catch (const NodeValidationError& error) @@ -782,7 +782,7 @@ TEST(type_prop, batchnorm_training_partial_input_rank_dynamic_some_rank_static_d auto gamma = make_shared(gamma_et, gamma_shape); auto beta = make_shared(beta_et, beta_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -809,7 +809,7 @@ TEST(type_prop, batchnorm_training_partial_input_rank_dynamic_some_rank_static_d try { - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); FAIL() << "Wrong gamma/beta shape not detected"; } catch (const NodeValidationError& error) @@ -840,7 +840,7 @@ TEST(type_prop, try { - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); FAIL() << "Inconsistent gamma/beta shape not detected"; } catch (const NodeValidationError& error) @@ -870,7 +870,7 @@ TEST(type_prop, try { - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); FAIL() << "Inconsistent gamma/beta channel count not detected"; } catch (const NodeValidationError& error) @@ -897,7 +897,7 @@ TEST(type_prop, batchnorm_training_partial_input_rank_static_dynamic_some_static auto gamma = make_shared(gamma_et, gamma_shape); auto beta = make_shared(beta_et, beta_shape); - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -926,7 +926,7 @@ TEST(type_prop, try { - auto bn = make_shared(epsilon, gamma, beta, data_batch); + auto bn = make_shared(data_batch, gamma, beta, epsilon); FAIL() << "Inconsistent input/gamma/beta channel count not detected"; } catch (const NodeValidationError& error) @@ -970,7 +970,7 @@ TEST(type_prop, batchnorm_training_backprop_partial_all_rank_dynamic) auto delta = make_shared(delta_et, delta_shape); auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -1006,7 +1006,7 @@ TEST(type_prop, batchnorm_training_backprop_partial_input_rank_static_dynamic_ok auto delta = make_shared(delta_et, delta_shape); auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -1045,7 +1045,7 @@ TEST(type_prop, batchnorm_training_backprop_partial_input_rank_static_dynamic_ze try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "Zero channel count not detected"; } catch (const NodeValidationError& error) @@ -1082,7 +1082,7 @@ TEST(type_prop, batchnorm_training_backprop_partial_delta_rank_static_dynamic_ok auto delta = make_shared(delta_et, delta_shape); auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -1118,7 +1118,7 @@ TEST(type_prop, batchnorm_training_backprop_partial_delta_rank_static_dynamic_ch auto delta = make_shared(delta_et, delta_shape); auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -1156,7 +1156,7 @@ TEST(type_prop, batchnorm_training_backprop_partial_delta_rank_static_dynamic_ze try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "Zero channel count not detected"; } catch (const NodeValidationError& error) @@ -1194,7 +1194,7 @@ TEST(type_prop, auto delta = make_shared(delta_et, delta_shape); auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -1233,7 +1233,7 @@ TEST( try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "Wrong gamma/beta/mean/variance shape not detected"; } catch (const NodeValidationError& error) @@ -1276,7 +1276,7 @@ TEST( try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "Wrong gamma/beta/mean/variance shape not detected"; } catch (const NodeValidationError& error) @@ -1318,7 +1318,7 @@ TEST( try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "nconsistent gamma/beta/mean/variance channel count not detected"; } catch (const NodeValidationError& error) @@ -1357,7 +1357,7 @@ TEST(type_prop, auto delta = make_shared(delta_et, delta_shape); auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); ASSERT_EQ(bn->get_output_size(), 3); ASSERT_EQ(bn->get_output_element_type(0), data_batch_et); @@ -1396,7 +1396,7 @@ TEST( try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "Inconsistent delta/gamma/beta/mean/variance channel count not detected"; } catch (const NodeValidationError& error) @@ -1439,7 +1439,7 @@ TEST( try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "Inconsistent input/delta batch size not detected"; } catch (const NodeValidationError& error) @@ -1483,7 +1483,7 @@ TEST( try { auto bn = make_shared( - epsilon, gamma, beta, data_batch, mean, variance, delta); + data_batch, gamma, beta, mean, variance, delta, epsilon); FAIL() << "Inconsistent input/delta spatial dimensions not detected"; } catch (const NodeValidationError& error) From 0c7ff9c858b25dedff2f215ce20d535b3cff1cc9 Mon Sep 17 00:00:00 2001 From: Adam Procter Date: Mon, 12 Nov 2018 16:02:06 -0800 Subject: [PATCH 02/10] Update doc/sphinx/source/ops/batch_norm_training_backprop.rst Co-Authored-By: diyessi --- doc/sphinx/source/ops/batch_norm_training_backprop.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx/source/ops/batch_norm_training_backprop.rst b/doc/sphinx/source/ops/batch_norm_training_backprop.rst index 68004bbf092..f759cc1fbf1 100644 --- a/doc/sphinx/source/ops/batch_norm_training_backprop.rst +++ b/doc/sphinx/source/ops/batch_norm_training_backprop.rst @@ -30,7 +30,7 @@ Inputs +----------------------+-------------------------+------------------------------+ | ``variance`` | same as ``input`` | :math:`(C)` | +----------------------+-------------------------+------------------------------+ -| ``normalized_delta`` | same as ``input`` | :math:`input` | +| ``normalized_delta`` | same as ``input`` | same as ``input`` | +----------------------+-------------------------+------------------------------+ From 9ac7c3d6825bd846541a6eac7889964d0ad178f6 Mon Sep 17 00:00:00 2001 From: Scott Cyphers Date: Mon, 12 Nov 2018 16:04:35 -0800 Subject: [PATCH 03/10] Remove unwanted files --- test/backend_test.in.cpp-9bfce850 | 5571 ----------------------------- test/cpu_fusion.cpp-41c1ba06 | 3132 ---------------- 2 files changed, 8703 deletions(-) delete mode 100644 test/backend_test.in.cpp-9bfce850 delete mode 100644 test/cpu_fusion.cpp-41c1ba06 diff --git a/test/backend_test.in.cpp-9bfce850 b/test/backend_test.in.cpp-9bfce850 deleted file mode 100644 index 1b58addb5bd..00000000000 --- a/test/backend_test.in.cpp-9bfce850 +++ /dev/null @@ -1,5571 +0,0 @@ -//***************************************************************************** -// Copyright 2017-2018 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -//***************************************************************************** - -#include -#include -#include -#include -#include -#include -#include -#include "gtest/gtest.h" - -#include "ngraph/autodiff/adjoints.hpp" -#include "ngraph/graph_util.hpp" -#include "ngraph/log.hpp" -#include "ngraph/ngraph.hpp" -#include "ngraph/op/experimental/generate_mask.hpp" -#include "ngraph/serializer.hpp" -#include "ngraph/state/rng_state.hpp" -#include "util/all_close.hpp" -#include "util/all_close_f.hpp" -#include "util/ndarray.hpp" -#include "util/random.hpp" -#include "util/test_control.hpp" -#include "util/test_tools.hpp" - -using namespace std; -using namespace ngraph; - -static string s_manifest = "${MANIFEST}"; - -static const vector s_known_element_types = {element::from(), - element::from(), - element::from(), - element::from(), - element::from(), - element::from(), - element::from(), - element::from(), - element::from(), - element::from()}; - -class UnhandledOp : public ngraph::op::Op -{ -public: - UnhandledOp(const std::shared_ptr& arg) - : Op("Unsupported_op", check_single_output_args({arg})) - { - constructor_validate_and_infer_types(); - } - shared_ptr copy_with_new_args(const NodeVector& new_args) const override - { - return make_shared(new_args[0]); - } - -protected: - void validate_and_infer_types() override - { - set_output_type(0, get_input_element_type(0), get_input_partial_shape(0)); - } -}; - -NGRAPH_TEST(${BACKEND_NAME}, unhandled_op) -{ - Shape shape{2, 2}; - auto A = make_shared(element::f32, shape); - auto unhandled = make_shared(A); - auto f = make_shared(unhandled, op::ParameterVector{A}); - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - shared_ptr a = backend->create_tensor(shape); - shared_ptr result = backend->create_tensor(shape); - ASSERT_THROW(backend->call_with_validate(f, {result}, {a}), unsupported_op); -} - -NGRAPH_TEST(${BACKEND_NAME}, function_name) -{ - Shape shape{2, 2}; - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto f = make_shared(A + B, op::ParameterVector{A, B}, "funky func name"); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - shared_ptr a = backend->create_tensor(shape); - shared_ptr b = backend->create_tensor(shape); - shared_ptr result = backend->create_tensor(shape); - - copy_data(a, test::NDArray({{1, 2}, {3, 4}}).get_vector()); - copy_data(b, test::NDArray({{5, 6}, {7, 8}}).get_vector()); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ(read_vector(result), - (test::NDArray({{6, 8}, {10, 12}})).get_vector()); -} - -NGRAPH_TEST(${BACKEND_NAME}, node_name) -{ - Shape shape{2, 2}; - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto C = A + B; - C->set_name("a node name"); - auto f = make_shared(C, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - shared_ptr a = backend->create_tensor(element::f32, shape); - shared_ptr b = backend->create_tensor(element::f32, shape); - shared_ptr result = backend->create_tensor(element::f32, shape); - - copy_data(a, test::NDArray({{1, 2}, {3, 4}}).get_vector()); - copy_data(b, test::NDArray({{5, 6}, {7, 8}}).get_vector()); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ(read_vector(result), - (test::NDArray({{6, 8}, {10, 12}})).get_vector()); -} - -NGRAPH_TEST(${BACKEND_NAME}, aliased_output) -{ - Shape shape{2, 2}; - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto C = A + B; - auto D = A * B; - auto E = op::Constant::create(element::f32, shape, {1, 2, 3, 4}); - auto f = make_shared(NodeVector{C, C, D, D, C, E, E}, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - shared_ptr a = backend->create_tensor(element::f32, shape); - shared_ptr b = backend->create_tensor(element::f32, shape); - shared_ptr out1 = backend->create_tensor(element::f32, shape); - shared_ptr out2 = backend->create_tensor(element::f32, shape); - shared_ptr out3 = backend->create_tensor(element::f32, shape); - shared_ptr out4 = backend->create_tensor(element::f32, shape); - shared_ptr out5 = backend->create_tensor(element::f32, shape); - shared_ptr out6 = backend->create_tensor(element::f32, shape); - shared_ptr out7 = backend->create_tensor(element::f32, shape); - - copy_data(a, vector{0, 1, 2, 3}); - copy_data(b, vector{1, 2, 3, 4}); - vector expectedC{1, 3, 5, 7}; - vector expectedD{0, 2, 6, 12}; - vector expectedE{1, 2, 3, 4}; - - backend->call_with_validate(f, {out1, out2, out3, out4, out5, out6, out7}, {a, b}); - EXPECT_EQ(expectedC, read_vector(out1)); - EXPECT_EQ(expectedC, read_vector(out2)); - EXPECT_EQ(expectedD, read_vector(out3)); - EXPECT_EQ(expectedD, read_vector(out4)); - EXPECT_EQ(expectedC, read_vector(out5)); - EXPECT_EQ(expectedE, read_vector(out6)); - EXPECT_EQ(expectedE, read_vector(out7)); -} - -NGRAPH_TEST(${BACKEND_NAME}, parameter_as_output) -{ - Shape shape{3, 4}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(A, op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - shared_ptr a = backend->create_tensor(element::f32, shape); - shared_ptr result = backend->create_tensor(element::f32, shape); - - vector expected{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; - vector zero(shape_size(shape), 0); - copy_data(a, expected); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ(read_vector(result), expected); -} - -NGRAPH_TEST(${BACKEND_NAME}, abc) -{ - Shape shape{2, 2}; - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto C = make_shared(element::f32, shape); - auto f = make_shared((A + B) * C, op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - shared_ptr a = backend->create_tensor(element::f32, shape); - shared_ptr b = backend->create_tensor(element::f32, shape); - shared_ptr c = backend->create_tensor(element::f32, shape); - shared_ptr result = backend->create_tensor(element::f32, shape); - - copy_data(a, test::NDArray({{1, 2}, {3, 4}}).get_vector()); - copy_data(b, test::NDArray({{5, 6}, {7, 8}}).get_vector()); - copy_data(c, test::NDArray({{9, 10}, {11, 12}}).get_vector()); - - backend->call_with_validate(f, {result}, {a, b, c}); - EXPECT_EQ(read_vector(result), - (test::NDArray({{54, 80}, {110, 144}})).get_vector()); - - backend->call_with_validate(f, {result}, {b, a, c}); - EXPECT_EQ(read_vector(result), - (test::NDArray({{54, 80}, {110, 144}})).get_vector()); - - backend->call_with_validate(f, {result}, {a, c, b}); - EXPECT_EQ(read_vector(result), - (test::NDArray({{50, 72}, {98, 128}})).get_vector()); -} - -NGRAPH_TEST(${BACKEND_NAME}, abc_int64) -{ - Shape shape{2, 2}; - auto A = make_shared(element::i64, shape); - auto B = make_shared(element::i64, shape); - auto C = make_shared(element::i64, shape); - auto f = make_shared((A + B) * C, op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::i64, shape); - copy_data(a, vector{1, 2, 3, 4}); - auto b = backend->create_tensor(element::i64, shape); - copy_data(b, vector{5, 6, 7, 8}); - auto c = backend->create_tensor(element::i64, shape); - copy_data(c, vector{9, 10, 11, 12}); - auto result = backend->create_tensor(element::i64, shape); - - backend->call_with_validate(f, {result}, {a, b, c}); - EXPECT_EQ((vector{54, 80, 110, 144}), read_vector(result)); - - backend->call_with_validate(f, {result}, {b, a, c}); - EXPECT_EQ((vector{54, 80, 110, 144}), read_vector(result)); - - backend->call_with_validate(f, {result}, {a, c, b}); - EXPECT_EQ((vector{50, 72, 98, 128}), read_vector(result)); -} - -// Multiple retrive values -NGRAPH_TEST(${BACKEND_NAME}, multiple_result) -{ - Shape shape{2, 2}; - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto C = make_shared(element::f32, shape); - auto A_add_B = make_shared(A, B); - auto A_add_B_mul_C = make_shared(A_add_B, C); - - auto f = - make_shared(NodeVector{A_add_B, A_add_B_mul_C}, op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{1, 2, 3, 4}); - auto b = backend->create_tensor(element::f32, shape); - copy_data(b, vector{5, 6, 7, 8}); - auto c = backend->create_tensor(element::f32, shape); - copy_data(c, vector{9, 10, 11, 12}); - - auto r0 = backend->create_tensor(element::f32, shape); - auto r1 = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {r0, r1}, {a, b, c}); - - EXPECT_EQ((vector{6, 8, 10, 12}), read_vector(r0)); - EXPECT_EQ((vector{54, 80, 110, 144}), read_vector(r1)); -} - -NGRAPH_TEST(${BACKEND_NAME}, batch_norm_one_output) -{ - auto shape_in = Shape{2, 3}; - auto shape_mean = Shape{3}; - - auto A = make_shared(element::f64, shape_in); - auto Mean = - op::Constant::create(element::f64, shape_mean, {0.00396654, -1.25294404, 1.16651872}); - auto Variance = - op::Constant::create(element::f64, shape_mean, {2.40871689, 1.44969511, 0.23469392}); - auto Beta = - op::Constant::create(element::f64, shape_mean, {2.14211921, -0.75733924, 0.42210531}); - auto Gamma = - op::Constant::create(element::f64, shape_mean, {1.75437676, 0.37950502, 1.13727544}); - - auto BN = make_shared(A, Gamma, Beta, Mean, Variance, 1e-3); - auto f = make_shared(BN, op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f64, shape_in); - copy_data( - a, - vector{-1.97431703, -2.06521307, 0.54122217, 2.53375939, -0.22342691, 0.45340773}); - - auto result = backend->create_tensor(element::f64, shape_in); - vector expected_result{ - -0.09365749, -1.01327395, -1.04269195, 5.00118923, -0.43295258, -1.24840283}; - - backend->call_with_validate(f, {result}, {a}); - EXPECT_TRUE(test::all_close(vector{expected_result}, read_vector(result))); -} - -NGRAPH_TEST(${BACKEND_NAME}, batch_norm_three_outputs) -{ - auto shape_in = Shape{2, 3}; - auto shape_mean = Shape{3}; - - auto A = make_shared(element::f64, shape_in); - auto Beta = - op::Constant::create(element::f64, shape_mean, {2.14211921, -0.75733924, 0.42210531}); - auto Gamma = - op::Constant::create(element::f64, shape_mean, {1.75437676, 0.37950502, 1.13727544}); - - auto BN = make_shared(A, Gamma, Beta, 1e-3); - - auto f0 = - make_shared(make_shared(BN, 0), op::ParameterVector{A}); - auto f1 = - make_shared(make_shared(BN, 1), op::ParameterVector{A}); - auto f2 = - make_shared(make_shared(BN, 2), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f64, shape_in); - copy_data( - a, - vector{-1.97431703, -2.06521307, 0.54122217, 2.53375939, -0.22342691, 0.45340773}); - - auto result0 = backend->create_tensor(element::f64, shape_in); - vector expected_result0{ - 0.3879149, -1.13662076, 1.34494817, 3.89632344, -0.37805778, -0.50073695}; - - backend->call_with_validate(f0, {result0}, {a}); - EXPECT_TRUE(test::all_close(vector{expected_result0}, read_vector(result0))); - - auto result1 = backend->create_tensor(element::f64, shape_mean); - vector expected_result1{0.27972114, -1.14431989, 0.49731493}; - - backend->call_with_validate(f1, {result1}, {a}); - EXPECT_TRUE(test::all_close(vector{expected_result1}, read_vector(result1))); - - auto result2 = backend->create_tensor(element::f64, shape_mean); - vector expected_result2{5.08068895e+00, 8.48043919e-01, 1.92784308e-03}; - - backend->call_with_validate(f2, {result2}, {a}); - EXPECT_TRUE(test::all_close(vector{expected_result2}, read_vector(result2))); -} - -NGRAPH_TEST(${BACKEND_NAME}, concat_matrix_colwise) -{ - Shape shape_a{2, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{2, 3}; - auto B = make_shared(element::f32, shape_b); - Shape shape_c{2, 3}; - auto C = make_shared(element::f32, shape_c); - Shape shape_r{2, 8}; - auto f = make_shared(make_shared(NodeVector{A, B, C}, 1), - op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{2, 4, 8, 16}); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{1, 2, 4, 8, 16, 32}); - auto c = backend->create_tensor(element::f32, shape_c); - copy_data(c, vector{2, 3, 5, 7, 11, 13}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b, c}); - EXPECT_EQ((vector{2, 4, 1, 2, 4, 2, 3, 5, 8, 16, 8, 16, 32, 7, 11, 13}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, concat_matrix_rowwise) -{ - Shape shape_a{2, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{3, 2}; - auto B = make_shared(element::f32, shape_b); - Shape shape_c{3, 2}; - auto C = make_shared(element::f32, shape_c); - Shape shape_r{8, 2}; - auto f = make_shared(make_shared(NodeVector{A, B, C}, 0), - op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{2, 4, 8, 16}); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{1, 2, 4, 8, 16, 32}); - auto c = backend->create_tensor(element::f32, shape_c); - copy_data(c, vector{2, 3, 5, 7, 11, 13}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b, c}); - EXPECT_EQ((vector{2, 4, 8, 16, 1, 2, 4, 8, 16, 32, 2, 3, 5, 7, 11, 13}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, concat_matrix_int64) -{ - Shape shape_a{2, 2}; - auto A = make_shared(element::i64, shape_a); - Shape shape_b{3, 2}; - auto B = make_shared(element::i64, shape_b); - Shape shape_c{3, 2}; - auto C = make_shared(element::i64, shape_c); - Shape shape_r{8, 2}; - auto f = make_shared(make_shared(NodeVector{A, B, C}, 0), - op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::i64, shape_a); - copy_data(a, vector{2, 4, 8, 16}); - auto b = backend->create_tensor(element::i64, shape_b); - copy_data(b, vector{1, 2, 4, 8, 16, 32}); - auto c = backend->create_tensor(element::i64, shape_c); - copy_data(c, vector{2, 3, 5, 7, 11, 13}); - auto result = backend->create_tensor(element::i64, shape_r); - - backend->call_with_validate(f, {result}, {a, b, c}); - EXPECT_EQ((vector{2, 4, 8, 16, 1, 2, 4, 8, 16, 32, 2, 3, 5, 7, 11, 13}), - read_vector(result)); -} - -// Params to drive concat_vector_large testing variations -class concat_vector_params : public ::testing::TestWithParam -{ -protected: - concat_vector_params() { num_inputs = GetParam(); } - uint32_t num_inputs; -}; - -NGRAPH_TEST_P(${BACKEND_NAME}, concat_vector_params, concat_vector_large) -{ - Shape shape_a{1}; - NodeVector inputs; - op::ParameterVector inputs_param; - for (uint32_t i = 0; i < num_inputs; i++) - { - auto A = make_shared(element::f32, shape_a); - inputs_param.push_back(A); - inputs.push_back(A); - } - Shape shape_r{num_inputs}; - auto f = make_shared(make_shared(inputs, 0), inputs_param); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - std::vector> inputs_value; - std::vector ref_result; - for (uint32_t i = 0; i < num_inputs; i++) - { - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{static_cast(i)}); - ref_result.push_back(static_cast(i)); - inputs_value.push_back(a); - } - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, inputs_value); - EXPECT_EQ(ref_result, read_vector(result)); -} - -// concat_vector_large case generation -// Add thhosw tests to cover paramter space overflow: -// cuda kernel parameter space have limit, if there is large number of parameters, -// there will be overflow for parameter space. -NGRAPH_INSTANTIATE_TEST_CASE_P(${BACKEND_NAME}, - input_sizes, - concat_vector_params, - testing::Values(100, 128, 999)); - -NGRAPH_TEST(${BACKEND_NAME}, concat_vector) -{ - Shape shape_a{4}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{6}; - auto B = make_shared(element::f32, shape_b); - Shape shape_c{2}; - auto C = make_shared(element::f32, shape_c); - Shape shape_r{12}; - auto f = make_shared(make_shared(NodeVector{A, B, C}, 0), - op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{2, 4, 8, 16}); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{1, 2, 4, 8, 16, 32}); - auto c = backend->create_tensor(element::f32, shape_c); - copy_data(c, vector{18, 19}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b, c}); - EXPECT_EQ((vector{2, 4, 8, 16, 1, 2, 4, 8, 16, 32, 18, 19}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, concat_4d_tensor) -{ - Shape shape{1, 1, 1, 1}; - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto C = make_shared(element::f32, shape); - Shape shape_r{3, 1, 1, 1}; - auto f = make_shared(make_shared(NodeVector{A, B, C}, 0), - op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{1}); - auto b = backend->create_tensor(element::f32, shape); - copy_data(b, vector{2}); - auto c = backend->create_tensor(element::f32, shape); - copy_data(c, vector{3}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b, c}); - EXPECT_EQ((vector{1, 2, 3}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, concat_2d_tensor) -{ - Shape shape{1, 1}; - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto C = make_shared(element::f32, shape); - Shape shape_r{3, 1}; - auto f = make_shared(make_shared(NodeVector{A, B, C}, 0), - op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{1}); - auto b = backend->create_tensor(element::f32, shape); - copy_data(b, vector{2}); - auto c = backend->create_tensor(element::f32, shape); - copy_data(c, vector{3}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b, c}); - EXPECT_EQ((vector{1, 2, 3}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_2d_tensor) -{ - Shape shape{1, 1}; - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto add1 = make_shared(A, B); - auto C = make_shared(element::f32, shape); - auto D = make_shared(element::f32, shape); - auto add2 = make_shared(C, D); - auto subtract = make_shared(C, A); - Shape shape_r{3, 1}; - auto f = make_shared(make_shared(NodeVector{add1, add2, subtract}, 0), - op::ParameterVector{A, B, C, D}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{1}); - auto b = backend->create_tensor(element::f32, shape); - copy_data(b, vector{2}); - auto c = backend->create_tensor(element::f32, shape); - copy_data(c, vector{3}); - auto d = backend->create_tensor(element::f32, shape); - copy_data(d, vector{4}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b, c, d}); - EXPECT_EQ((vector{3, 7, 2}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, concat_in_place_propagate_2d_tensor) -{ - Shape shape{1, 1}; - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto add1 = make_shared(A, B); - auto C = make_shared(element::f32, shape); - auto D = make_shared(element::f32, shape); - auto add2 = make_shared(C, D); - auto concat1 = make_shared(NodeVector{add1, add2}, 0); - auto subtract = make_shared(C, A); - Shape shape_r{3, 1}; - auto f = make_shared(make_shared(NodeVector{concat1, subtract}, 0), - op::ParameterVector{A, B, C, D}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{1}); - auto b = backend->create_tensor(element::f32, shape); - copy_data(b, vector{2}); - auto c = backend->create_tensor(element::f32, shape); - copy_data(c, vector{3}); - auto d = backend->create_tensor(element::f32, shape); - copy_data(d, vector{4}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b, c, d}); - EXPECT_EQ((vector{3, 7, 2}), read_vector(result)); -} - -// from numpy import * -// a=linspace(1,2*3*4*3*2,2*3*4*3*2) -// b=linspace(1000+1,1000+2*3*3*3*2,2*3*3*3*2) -// c=linspace(2000+1,2000+2*3*2*3*2,2*3*2*3*2) -// a.shape=(2,3,4,3,2) -// b.shape=(2,3,3,3,2) -// c.shape=(2,3,2,3,2) -// z=concatenate((a,b,c),axis=2) -// z.shape=(2*3*(4+3+2)*3*2) -// set_printoptions(suppress=True) -// print(z) -// -// [ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. -// 11. 12. 13. 14. 15. 16. 17. 18. 19. 20. -// 21. 22. 23. 24. 1001. 1002. 1003. 1004. 1005. 1006. -// 1007. 1008. 1009. 1010. 1011. 1012. 1013. 1014. 1015. 1016. -// 1017. 1018. 2001. 2002. 2003. 2004. 2005. 2006. 2007. 2008. -// 2009. 2010. 2011. 2012. 25. 26. 27. 28. 29. 30. -// 31. 32. 33. 34. 35. 36. 37. 38. 39. 40. -// 41. 42. 43. 44. 45. 46. 47. 48. 1019. 1020. -// 1021. 1022. 1023. 1024. 1025. 1026. 1027. 1028. 1029. 1030. -// 1031. 1032. 1033. 1034. 1035. 1036. 2013. 2014. 2015. 2016. -// 2017. 2018. 2019. 2020. 2021. 2022. 2023. 2024. 49. 50. -// 51. 52. 53. 54. 55. 56. 57. 58. 59. 60. -// 61. 62. 63. 64. 65. 66. 67. 68. 69. 70. -// 71. 72. 1037. 1038. 1039. 1040. 1041. 1042. 1043. 1044. -// 1045. 1046. 1047. 1048. 1049. 1050. 1051. 1052. 1053. 1054. -// 2025. 2026. 2027. 2028. 2029. 2030. 2031. 2032. 2033. 2034. -// 2035. 2036. 73. 74. 75. 76. 77. 78. 79. 80. -// 81. 82. 83. 84. 85. 86. 87. 88. 89. 90. -// 91. 92. 93. 94. 95. 96. 1055. 1056. 1057. 1058. -// 1059. 1060. 1061. 1062. 1063. 1064. 1065. 1066. 1067. 1068. -// 1069. 1070. 1071. 1072. 2037. 2038. 2039. 2040. 2041. 2042. -// 2043. 2044. 2045. 2046. 2047. 2048. 97. 98. 99. 100. -// 101. 102. 103. 104. 105. 106. 107. 108. 109. 110. -// 111. 112. 113. 114. 115. 116. 117. 118. 119. 120. -// 1073. 1074. 1075. 1076. 1077. 1078. 1079. 1080. 1081. 1082. -// 1083. 1084. 1085. 1086. 1087. 1088. 1089. 1090. 2049. 2050. -// 2051. 2052. 2053. 2054. 2055. 2056. 2057. 2058. 2059. 2060. -// 121. 122. 123. 124. 125. 126. 127. 128. 129. 130. -// 131. 132. 133. 134. 135. 136. 137. 138. 139. 140. -// 141. 142. 143. 144. 1091. 1092. 1093. 1094. 1095. 1096. -// 1097. 1098. 1099. 1100. 1101. 1102. 1103. 1104. 1105. 1106. -// 1107. 1108. 2061. 2062. 2063. 2064. 2065. 2066. 2067. 2068. -// 2069. 2070. 2071. 2072.] -NGRAPH_TEST(${BACKEND_NAME}, concat_5d) -{ - vector a_data(2 * 3 * 4 * 3 * 2); - for (int i = 0; i < 2 * 3 * 4 * 3 * 2; i++) - { - a_data[i] = float(i + 1); - } - - vector b_data(2 * 3 * 3 * 3 * 2); - for (int i = 0; i < 2 * 3 * 3 * 3 * 2; i++) - { - b_data[i] = 1000 + float(i + 1); - } - - vector c_data(2 * 3 * 2 * 3 * 2); - for (int i = 0; i < 2 * 3 * 2 * 3 * 2; i++) - { - c_data[i] = 2000 + float(i + 1); - } - - Shape shape_a{2, 3, 4, 3, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{2, 3, 3, 3, 2}; - auto B = make_shared(element::f32, shape_b); - Shape shape_c{2, 3, 2, 3, 2}; - auto C = make_shared(element::f32, shape_c); - Shape shape_r{2, 3, 9, 3, 2}; - - auto r = make_shared(NodeVector{A, B, C}, 2); - auto f = make_shared(r, op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, a_data); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, b_data); - auto c = backend->create_tensor(element::f32, shape_c); - copy_data(c, c_data); - - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b, c}); - EXPECT_EQ( - (vector{ - 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., - 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., - 1001., 1002., 1003., 1004., 1005., 1006., 1007., 1008., 1009., 1010., 1011., 1012., - 1013., 1014., 1015., 1016., 1017., 1018., 2001., 2002., 2003., 2004., 2005., 2006., - 2007., 2008., 2009., 2010., 2011., 2012., 25., 26., 27., 28., 29., 30., - 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., 42., - 43., 44., 45., 46., 47., 48., 1019., 1020., 1021., 1022., 1023., 1024., - 1025., 1026., 1027., 1028., 1029., 1030., 1031., 1032., 1033., 1034., 1035., 1036., - 2013., 2014., 2015., 2016., 2017., 2018., 2019., 2020., 2021., 2022., 2023., 2024., - 49., 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., - 61., 62., 63., 64., 65., 66., 67., 68., 69., 70., 71., 72., - 1037., 1038., 1039., 1040., 1041., 1042., 1043., 1044., 1045., 1046., 1047., 1048., - 1049., 1050., 1051., 1052., 1053., 1054., 2025., 2026., 2027., 2028., 2029., 2030., - 2031., 2032., 2033., 2034., 2035., 2036., 73., 74., 75., 76., 77., 78., - 79., 80., 81., 82., 83., 84., 85., 86., 87., 88., 89., 90., - 91., 92., 93., 94., 95., 96., 1055., 1056., 1057., 1058., 1059., 1060., - 1061., 1062., 1063., 1064., 1065., 1066., 1067., 1068., 1069., 1070., 1071., 1072., - 2037., 2038., 2039., 2040., 2041., 2042., 2043., 2044., 2045., 2046., 2047., 2048., - 97., 98., 99., 100., 101., 102., 103., 104., 105., 106., 107., 108., - 109., 110., 111., 112., 113., 114., 115., 116., 117., 118., 119., 120., - 1073., 1074., 1075., 1076., 1077., 1078., 1079., 1080., 1081., 1082., 1083., 1084., - 1085., 1086., 1087., 1088., 1089., 1090., 2049., 2050., 2051., 2052., 2053., 2054., - 2055., 2056., 2057., 2058., 2059., 2060., 121., 122., 123., 124., 125., 126., - 127., 128., 129., 130., 131., 132., 133., 134., 135., 136., 137., 138., - 139., 140., 141., 142., 143., 144., 1091., 1092., 1093., 1094., 1095., 1096., - 1097., 1098., 1099., 1100., 1101., 1102., 1103., 1104., 1105., 1106., 1107., 1108., - 2061., 2062., 2063., 2064., 2065., 2066., 2067., 2068., 2069., 2070., 2071., 2072.}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, concat_zero_length_1d_last) -{ - Shape shape_a{4}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{0}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{4}; - - auto r = make_shared(NodeVector{A, B}, 0); - auto f = make_shared(r, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - vector a_data{1, 2, 3, 4}; - vector b_data(0); - - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, a_data); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, b_data); - - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, concat_zero_length_1d_middle) -{ - Shape shape_a{4}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{0}; - auto B = make_shared(element::f32, shape_b); - Shape shape_c{4}; - auto C = make_shared(element::f32, shape_c); - Shape shape_r{8}; - - auto r = make_shared(NodeVector{A, B, C}, 0); - auto f = make_shared(r, op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - vector a_data{1, 2, 3, 4}; - vector b_data(0); - vector c_data{5, 6, 7, 8}; - - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, a_data); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, b_data); - auto c = backend->create_tensor(element::f32, shape_c); - copy_data(c, c_data); - - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b, c}); - EXPECT_EQ((vector{1, 2, 3, 4, 5, 6, 7, 8}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, concat_zero_length_4d_middle) -{ - Shape shape_a{2, 2, 1, 1}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{2, 2, 0, 1}; - auto B = make_shared(element::f32, shape_b); - Shape shape_c{2, 2, 1, 1}; - auto C = make_shared(element::f32, shape_c); - Shape shape_r{2, 2, 2, 1}; - - auto r = make_shared(NodeVector{A, B, C}, 2); - auto f = make_shared(r, op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - vector a_data{1, 2, 3, 4}; - vector b_data(0); - vector c_data{5, 6, 7, 8}; - - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, a_data); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, b_data); - auto c = backend->create_tensor(element::f32, shape_c); - copy_data(c, c_data); - - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b, c}); - EXPECT_EQ((vector{1, 5, 2, 6, 3, 7, 4, 8}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, lrn) -{ - Shape shape{2, 3, 2, 1}; - auto A = make_shared(element::f32, shape); - auto lrn = make_shared(A, 1., 2., 1., 3); - auto f = make_shared(lrn, op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - vector args{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, args); - - auto result = backend->create_tensor(element::f32, shape); - backend->call_with_validate(f, {result}, {a}); - - vector expected{0.f, - 0.05325444f, - 0.03402646f, - 0.01869806f, - 0.06805293f, - 0.03287071f, - 0.00509002f, - 0.00356153f, - 0.00174719f, - 0.0012555f, - 0.00322708f, - 0.00235574f}; - EXPECT_TRUE(test::all_close_f(expected, read_vector(result))); -} - -NGRAPH_TEST(${BACKEND_NAME}, select) -{ - Shape shape{2, 2, 2}; - auto A = make_shared(element::boolean, shape); - auto B = make_shared(element::f32, shape); - auto C = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, B, C), op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::boolean, shape); - copy_data(a, vector{0, 1, 1, 0, 0, 1, 0, 1}); - auto b = backend->create_tensor(element::f32, shape); - copy_data(b, vector{1, 2, 3, 4, 5, 6, 7, 8}); - auto c = backend->create_tensor(element::f32, shape); - copy_data(c, vector{11, 12, 13, 14, 15, 16, 17, 18}); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a, b, c}); - EXPECT_EQ((vector{11, 2, 3, 14, 15, 6, 17, 8}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, tensor_constant) -{ - Shape shape{2, 2, 2}; - auto A = op::Constant::create(element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8}); - auto f = make_shared(A, op::ParameterVector{}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {}); - EXPECT_EQ((vector{1, 2, 3, 4, 5, 6, 7, 8}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, tensor_2constant) -{ - Shape shape{2, 2, 2}; - auto A = op::Constant::create(element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8}); - auto f = make_shared(NodeVector{A, A}, op::ParameterVector{}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto result0 = backend->create_tensor(element::f32, shape); - auto result1 = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result0, result1}, {}); - EXPECT_EQ((vector{1, 2, 3, 4, 5, 6, 7, 8}), read_vector(result0)); - EXPECT_EQ((vector{1, 2, 3, 4, 5, 6, 7, 8}), read_vector(result1)); -} - -NGRAPH_TEST(${BACKEND_NAME}, tensor_constant_with_op) -{ - Shape shape{2, 2, 2}; - auto A = op::Constant::create(element::f32, shape, {-1, 2, 3, -4, 5, -6, -7, 8}); - auto f = make_shared(make_shared(A), op::ParameterVector{}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {}); - EXPECT_EQ((vector{1, 2, 3, 4, 5, 6, 7, 8}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, constant_multi_use) -{ - auto A = make_shared(element::i32, Shape{}, std::vector{"388"}); - auto f = make_shared(A, op::ParameterVector{}); - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - std::shared_ptr r1 = backend->create_tensor(element::i32, Shape{}); - backend->call_with_validate(f, {r1}, std::vector>{}); - EXPECT_EQ(read_vector(r1), std::vector{388}); - - std::shared_ptr r2 = backend->create_tensor(element::i32, Shape{}); - backend->call_with_validate(f, {r2}, std::vector>{}); - EXPECT_EQ(read_vector(r2), std::vector{388}); -} - -NGRAPH_TEST(${BACKEND_NAME}, function_call) -{ - // First create "f(A,B,C) = (A+B)*C". - Shape shape{2, 2}; - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto C = make_shared(element::f32, shape); - auto f = make_shared((A + B) * C, op::ParameterVector{A, B, C}); - - // Now make "g(X,Y,Z) = f(X,Y,Z) + f(X,Y,Z)" - auto X = make_shared(element::f32, shape); - auto Y = make_shared(element::f32, shape); - auto Z = make_shared(element::f32, shape); - auto g = - make_shared(make_shared(f, NodeVector{X + Y, Y + Z, Z + X}) + - make_shared(f, NodeVector{X, Y, Z}), - op::ParameterVector{X, Y, Z}); - - // Now call g on some test vectors. - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto x = backend->create_tensor(element::f32, shape); - copy_data(x, vector{1, 2, 3, 4}); - auto y = backend->create_tensor(element::f32, shape); - copy_data(y, vector{5, 6, 7, 8}); - auto z = backend->create_tensor(element::f32, shape); - copy_data(z, vector{9, 10, 11, 12}); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(g, {result}, {x, y, z}); - EXPECT_EQ((vector{254, 368, 502, 656}), read_vector(result)); - - backend->call_with_validate(g, {result}, {y, x, z}); - EXPECT_EQ((vector{278, 400, 542, 704}), read_vector(result)); - - backend->call_with_validate(g, {result}, {x, z, y}); - EXPECT_EQ((vector{194, 296, 418, 560}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, convert_int32_float32) -{ - Shape shape{2, 2}; - auto A = make_shared(element::i32, shape); - auto f = - make_shared(make_shared(A, element::f32), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::i32, shape); - copy_data(a, vector{1, 2, 3, 4}); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, convert_uint16_float32) -{ - Shape shape{2, 2}; - auto A = make_shared(element::u16, shape); - auto f = - make_shared(make_shared(A, element::f32), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::u16, shape); - copy_data(a, vector{1, 2, 3, 4}); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, convert_int32_bool) -{ - Shape shape{2, 2}; - auto A = make_shared(element::i32, shape); - auto f = make_shared(make_shared(A, element::boolean), - op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::i32, shape); - copy_data(a, vector{1, 2, 3, 4}); - auto result = backend->create_tensor(element::boolean, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, convert_float32_bool) -{ - Shape shape{2, 2}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, element::boolean), - op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{1, 2, 3, 4}); - auto result = backend->create_tensor(element::boolean, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, slice_scalar) -{ - Shape shape_a{}; - auto A = make_shared(element::f32, shape_a); - Shape shape_r{}; - auto r = make_shared(A, Coordinate{}, Coordinate{}); - auto f = make_shared(r, op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{312}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{312}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, slice_matrix) -{ - Shape shape_a{4, 4}; - auto A = make_shared(element::f32, shape_a); - Shape shape_r{3, 2}; - auto r = make_shared(A, Coordinate{0, 1}, Coordinate{3, 3}); - auto f = make_shared(r, op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{2, 3, 6, 7, 10, 11}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, slice_vector) -{ - Shape shape_a{16}; - auto A = make_shared(element::f32, shape_a); - Shape shape_r{12}; - auto r = make_shared(A, Coordinate{2}, Coordinate{14}); - auto f = make_shared(r, op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, slice_matrix_axis_0_overlap) -{ - Shape shape_a{4, 4}; - auto A = make_shared(element::f32, shape_a); - auto B = make_shared(element::f32, shape_a); - auto C = make_shared(A, B); - Shape shape_r{2, 4}; - auto D = make_shared(C, Coordinate{0, 0}, Coordinate{2, 4}); - auto E = make_shared(C, Coordinate{1, 0}, Coordinate{3, 4}); - auto r = make_shared(D, E); - auto f = make_shared(r, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - auto b = backend->create_tensor(element::f32, shape_a); - copy_data(b, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((vector{12, 16, 20, 24, 28, 32, 36, 40}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, slice_matrix_strided) -{ - Shape shape_a{4, 4}; - auto A = make_shared(element::f32, shape_a); - Shape shape_r{2, 2}; - auto r = make_shared(A, Coordinate{1, 0}, Coordinate{4, 4}, Strides{2, 3}); - auto f = make_shared(r, op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{4, 7, 12, 15}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, slice_3d) -{ - Shape shape_a{4, 4, 4}; - auto A = make_shared(element::f32, shape_a); - Shape shape_r{2, 2, 2}; - auto r = make_shared(A, Coordinate{1, 1, 1}, Coordinate{3, 3, 3}); - auto f = make_shared(r, op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{21, 22, 25, 26, 37, 38, 41, 42}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, slice_3d_strided) -{ - Shape shape_a{4, 4, 4}; - auto A = make_shared(element::f32, shape_a); - Shape shape_r{2, 2, 2}; - auto r = make_shared(A, Coordinate{0, 0, 0}, Coordinate{4, 4, 4}, Strides{2, 2, 2}); - auto f = make_shared(r, op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{0, 2, 8, 10, 32, 34, 40, 42}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, slice_3d_strided_different_strides) -{ - Shape shape_a{4, 4, 4}; - auto A = make_shared(element::f32, shape_a); - Shape shape_r{2, 2, 2}; - auto r = make_shared(A, Coordinate{0, 0, 0}, Coordinate{4, 4, 4}, Strides{2, 2, 3}); - auto f = make_shared(r, op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{0, 3, 8, 11, 32, 35, 40, 43}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, scalar_constant_float32) -{ - auto r = op::Constant::create(element::f32, Shape{}, {4.75}); - auto f = make_shared(r, op::ParameterVector{}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto result = backend->create_tensor(element::f32, Shape{}); - - backend->call_with_validate(f, {result}, {}); - EXPECT_EQ(vector{4.75f}, read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, scalar_constant_int64) -{ - auto r = op::Constant::create(element::i64, Shape{}, {2112}); - auto f = make_shared(r, op::ParameterVector{}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto result = backend->create_tensor(element::i64, Shape{}); - - backend->call_with_validate(f, {result}, {}); - EXPECT_EQ(vector{2112}, read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, tensor_constant_float32) -{ - Shape shape{2, 2}; - auto r = op::Constant::create(element::f32, shape, {4.75, 4.5, -5.25, 0.0}); - auto f = make_shared(r, op::ParameterVector{}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {}); - EXPECT_EQ((vector{4.75f, 4.5f, -5.25f, 0.0f}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, tensor_constant_int64) -{ - Shape shape{2, 2}; - auto r = op::Constant::create(element::i64, shape, {2112, 1848, 1776, 1964}); - auto f = make_shared(r, op::ParameterVector{}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto result = backend->create_tensor(element::i64, shape); - - backend->call_with_validate(f, {result}, {}); - EXPECT_EQ((vector{2112, 1848, 1776, 1964}), read_vector(result)); -} - -// TODO: Kahan sum only works in limited cases with CPU / Interpreter backend -NGRAPH_TEST(${BACKEND_NAME}, kahan_sum_to_scalar) -{ - Shape shape{2, 2}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - float epsilon = 9.5367431640625e-7f; - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{epsilon, -1.f, 0.f, 1.f}); - auto result = backend->create_tensor(element::f32, Shape{}); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_TRUE(test::all_close_f(vector{epsilon}, read_vector(result))); -} - -// TODO: Kahan sum only works in limited cases with CPU / Interpreter backend -NGRAPH_TEST(${BACKEND_NAME}, kahan_sum_3d_to_vector) -{ - Shape shape_a{3, 3, 3}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3}; - auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - float epsilon_a = 1.220703125e-4f; - float epsilon_b = 3.0517578125e-5f; - float epsilon_c = 7.62939453125e-6f; - copy_data(a, vector{1, 1, 1, 1, 1, 1, epsilon_a, epsilon_b, epsilon_c, - 1, 1, 1, 1, 1, 1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_TRUE(test::all_close_f(vector{epsilon_a, epsilon_b, epsilon_c}, - read_vector(result))); -} - -NGRAPH_TEST(${BACKEND_NAME}, constant_equality_bool) -{ - Shape shape{4}; - // auto A = make_shared(element::boolean, shape); - // auto B = make_shared(element::boolean, shape); - // auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); - - auto A = op::Constant::create(element::boolean, shape, {true, false, true, false}); - auto B = op::Constant::create(element::boolean, shape, {true, true, true, true}); - auto f = make_shared(make_shared(A, B), op::ParameterVector{}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto result = backend->create_tensor(element::boolean, shape); - - backend->call_with_validate(f, {result}, {}); - EXPECT_EQ((vector{true, false, true, false}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, replace_slice_scalar) -{ - Shape shape_a{}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{}; - auto r = make_shared(A, B, Coordinate{}, Coordinate{}); - auto f = make_shared(r, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{312}); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{808}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((vector{808}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, replace_slice_matrix_inplace) -{ - Shape shape_a{4, 4}; - auto A = make_shared(element::f32, shape_a); - auto abs_A = make_shared(A); - - Shape shape_b{3, 2}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{4, 4}; - auto r = make_shared(abs_A, B, Coordinate{0, 1}, Coordinate{3, 3}); - auto abs_r = make_shared(r); - auto f = make_shared(abs_r, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{102, 103, 106, 107, 110, 111}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((vector{1, 102, 103, 4, 5, 106, 107, 8, 9, 110, 111, 12, 13, 14, 15, 16}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, replace_slice_matrix) -{ - Shape shape_a{4, 4}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{3, 2}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{4, 4}; - auto r = make_shared(A, B, Coordinate{0, 1}, Coordinate{3, 3}); - auto f = make_shared(r, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{102, 103, 106, 107, 110, 111}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((vector{1, 102, 103, 4, 5, 106, 107, 8, 9, 110, 111, 12, 13, 14, 15, 16}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, replace_slice_vector) -{ - Shape shape_a{16}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{12}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{16}; - auto r = make_shared(A, B, Coordinate{2}, Coordinate{14}); - auto f = make_shared(r, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ( - (vector{0, 1, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 14, 15}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, replace_slice_3d) -{ - Shape shape_a{4, 4, 4}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{2, 2, 2}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{4, 4, 4}; - auto r = make_shared(A, B, Coordinate{1, 1, 1}, Coordinate{3, 3, 3}); - auto f = make_shared(r, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{921, 922, 925, 926, 937, 938, 941, 942}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - - 16, 17, 18, 19, 20, 921, 922, 23, 24, 925, 926, 27, 28, 29, 30, 31, - - 32, 33, 34, 35, 36, 937, 938, 39, 40, 941, 942, 43, 44, 45, 46, 47, - - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, replace_slice_3d_strided) -{ - Shape shape_a{4, 4, 4}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{2, 2, 2}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{4, 4, 4}; - auto r = make_shared( - A, B, Coordinate{0, 0, 0}, Coordinate{4, 4, 4}, Strides{2, 2, 2}); - auto f = make_shared(r, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{900, 902, 908, 910, 932, 934, 940, 942}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((vector{900, 1, 902, 3, 4, 5, 6, 7, 908, 9, 910, 11, 12, 13, 14, 15, - - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - - 932, 33, 934, 35, 36, 37, 38, 39, 940, 41, 942, 43, 44, 45, 46, 47, - - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, replace_slice_3d_strided_different_strides) -{ - Shape shape_a{4, 4, 4}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{2, 2, 2}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{4, 4, 4}; - auto r = make_shared( - A, B, Coordinate{0, 0, 0}, Coordinate{4, 4, 4}, Strides{2, 2, 3}); - auto f = make_shared(r, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{900, 903, 908, 911, 932, 935, 940, 943}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((vector{900, 1, 2, 903, 4, 5, 6, 7, 908, 9, 10, 911, 12, 13, 14, 15, - - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - - 932, 33, 34, 935, 36, 37, 38, 39, 940, 41, 42, 943, 44, 45, 46, 47, - - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_0d) -{ - Shape shape{}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{6}); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{6}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_1d_nochange) -{ - Shape shape{8}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7}); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{0, 1, 2, 3, 4, 5, 6, 7}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_1d_0) -{ - Shape shape{8}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{0, 1, 2, 3, 4, 5, 6, 7}); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{7, 6, 5, 4, 3, 2, 1, 0}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_2d_nochange) -{ - Shape shape{4, 3}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, - test::NDArray({{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}).get_vector()); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ( - (test::NDArray({{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}).get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_2d_0) -{ - Shape shape{4, 3}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, - test::NDArray({{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}).get_vector()); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ( - (test::NDArray({{9, 10, 11}, {6, 7, 8}, {3, 4, 5}, {0, 1, 2}}).get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_2d_1) -{ - Shape shape{4, 3}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, - test::NDArray({{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}).get_vector()); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ( - (test::NDArray({{2, 1, 0}, {5, 4, 3}, {8, 7, 6}, {11, 10, 9}}).get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_2d_01) -{ - Shape shape{4, 3}; - auto A = make_shared(element::f32, shape); - auto f = - make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, - test::NDArray({{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}).get_vector()); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ( - (test::NDArray({{11, 10, 9}, {8, 7, 6}, {5, 4, 3}, {2, 1, 0}}).get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_nochange) -{ - Shape shape{2, 4, 3}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, - test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, - {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) - .get_vector()); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, - {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) - .get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_0) -{ - Shape shape{2, 4, 3}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, - test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, - {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) - .get_vector()); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((test::NDArray({{{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}, - {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}}) - .get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_1) -{ - Shape shape{2, 4, 3}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, - test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, - {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) - .get_vector()); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((test::NDArray({{{9, 10, 11}, {6, 7, 8}, {3, 4, 5}, {0, 1, 2}}, - {{21, 22, 23}, {18, 19, 20}, {15, 16, 17}, {12, 13, 14}}}) - .get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_2) -{ - Shape shape{2, 4, 3}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{2}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, - test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, - {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) - .get_vector()); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((test::NDArray({{{2, 1, 0}, {5, 4, 3}, {8, 7, 6}, {11, 10, 9}}, - {{14, 13, 12}, {17, 16, 15}, {20, 19, 18}, {23, 22, 21}}}) - .get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_01) -{ - Shape shape{2, 4, 3}; - auto A = make_shared(element::f32, shape); - auto f = - make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, - test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, - {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) - .get_vector()); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((test::NDArray({{{21, 22, 23}, {18, 19, 20}, {15, 16, 17}, {12, 13, 14}}, - {{9, 10, 11}, {6, 7, 8}, {3, 4, 5}, {0, 1, 2}}}) - .get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_02) -{ - Shape shape{2, 4, 3}; - auto A = make_shared(element::f32, shape); - auto f = - make_shared(make_shared(A, AxisSet{0, 2}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, - test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, - {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) - .get_vector()); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((test::NDArray({{{14, 13, 12}, {17, 16, 15}, {20, 19, 18}, {23, 22, 21}}, - {{2, 1, 0}, {5, 4, 3}, {8, 7, 6}, {11, 10, 9}}}) - .get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_12) -{ - Shape shape{2, 4, 3}; - auto A = make_shared(element::f32, shape); - auto f = - make_shared(make_shared(A, AxisSet{1, 2}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, - test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, - {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) - .get_vector()); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((test::NDArray({{{11, 10, 9}, {8, 7, 6}, {5, 4, 3}, {2, 1, 0}}, - {{23, 22, 21}, {20, 19, 18}, {17, 16, 15}, {14, 13, 12}}}) - .get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_3d_012) -{ - Shape shape{2, 4, 3}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{0, 1, 2}), - op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, - test::NDArray({{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}}, - {{12, 13, 14}, {15, 16, 17}, {18, 19, 20}, {21, 22, 23}}}) - .get_vector()); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((test::NDArray({{{23, 22, 21}, {20, 19, 18}, {17, 16, 15}, {14, 13, 12}}, - {{11, 10, 9}, {8, 7, 6}, {5, 4, 3}, {2, 1, 0}}}) - .get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, numeric_float_nan) -{ - Shape shape{5}; - auto A = op::Constant::create(element::f32, shape, {-2.5f, 25.5f, 2.25f, NAN, 6.0f}); - auto B = op::Constant::create(element::f32, shape, {10.0f, 5.0f, 2.25f, 10.0f, NAN}); - auto f = make_shared(make_shared(A, B), op::ParameterVector{}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto result = backend->create_tensor(element::boolean, shape); - backend->call_with_validate(f, {result}, {}); - EXPECT_EQ((vector{false, false, true, false, false}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, numeric_double_nan) -{ - Shape shape{5}; - auto A = op::Constant::create(element::f64, shape, {-2.5f, 25.5f, 2.25f, NAN, 6.0f}); - auto B = op::Constant::create(element::f64, shape, {10.0f, 5.0f, 2.25f, 10.0f, NAN}); - auto f = make_shared(make_shared(A, B), op::ParameterVector{}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto result = backend->create_tensor(element::boolean, shape); - backend->call_with_validate(f, {result}, {}); - EXPECT_EQ((vector{false, false, true, false, false}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, numeric_float_inf) -{ - Shape shape{5}; - auto A = op::Constant::create(element::f32, shape, {-2.5f, 25.5f, 2.25f, INFINITY, 6.0f}); - auto B = op::Constant::create(element::f32, shape, {10.0f, 5.0f, 2.25f, 10.0f, -INFINITY}); - auto f = make_shared(make_shared(A, B), op::ParameterVector{}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto result = backend->create_tensor(element::boolean, shape); - backend->call_with_validate(f, {result}, {}); - EXPECT_EQ((vector{false, false, true, false, false}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, numeric_double_inf) -{ - Shape shape{5}; - auto A = op::Constant::create(element::f64, shape, {-2.5f, 25.5f, 2.25f, INFINITY, 6.0f}); - auto B = op::Constant::create(element::f64, shape, {10.0f, 5.0f, 2.25f, 10.0f, -INFINITY}); - auto f = make_shared(make_shared(A, B), op::ParameterVector{}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto result = backend->create_tensor(element::boolean, shape); - backend->call_with_validate(f, {result}, {}); - EXPECT_EQ((vector{false, false, true, false, false}), read_vector(result)); -} - -// -// From the XLA docs: https://www.tensorflow.org/performance/xla/operation_semantics#selectandscatter -// -NGRAPH_TEST(${BACKEND_NAME}, select_and_scatter_with_overlap) -{ - Shape shape_sel_a{}; - auto SEL_A = make_shared(element::f32, shape_sel_a); - Shape shape_sel_b{}; - auto SEL_B = make_shared(element::f32, shape_sel_b); - auto sel_f = make_shared(make_shared(SEL_A, SEL_B), - op::ParameterVector{SEL_A, SEL_B}); - - Shape shape_scatter_a{}; - auto SCATTER_A = make_shared(element::f32, shape_scatter_a); - Shape shape_scatter_b{}; - auto SCATTER_B = make_shared(element::f32, shape_scatter_b); - auto scatter_f = - make_shared(SCATTER_A + SCATTER_B, op::ParameterVector{SCATTER_A, SCATTER_B}); - - Shape shape_a{4, 5}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{2, 2}; - auto B = make_shared(element::f32, shape_b); - Shape shape_c{}; - auto C = make_shared(element::f32, shape_c); - Shape shape_r{4, 5}; - Shape window_shape{2, 3}; - auto window_strides = Strides{2, 2}; - auto f = make_shared( - make_shared(A, B, C, sel_f, scatter_f, window_shape, window_strides), - op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, - test::NDArray( - {{7, 2, 5, 3, 8}, {3, 8, 9, 3, 4}, {1, 5, 7, 5, 6}, {0, 6, 2, 10, 2}}) - .get_vector()); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, test::NDArray({{2, 6}, {3, 1}}).get_vector()); - auto c = backend->create_tensor(element::f32, shape_c); - copy_data(c, vector{0}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b, c}); - EXPECT_EQ((test::NDArray( - {{0, 0, 0, 0, 0}, {0, 0, 8, 0, 0}, {0, 0, 3, 0, 0}, {0, 0, 0, 1, 0}}) - .get_vector()), - read_vector(result)); -} - -// -// From the XLA docs: https://www.tensorflow.org/performance/xla/operation_semantics#selectandscatter -// -NGRAPH_TEST(${BACKEND_NAME}, select_and_scatter_without_overlap) -{ - Shape shape_sel_a{}; - auto SEL_A = make_shared(element::f32, shape_sel_a); - Shape shape_sel_b{}; - auto SEL_B = make_shared(element::f32, shape_sel_b); - auto sel_f = make_shared(make_shared(SEL_A, SEL_B), - op::ParameterVector{SEL_A, SEL_B}); - - Shape shape_scatter_a{}; - auto SCATTER_A = make_shared(element::f32, shape_scatter_a); - Shape shape_scatter_b{}; - auto SCATTER_B = make_shared(element::f32, shape_scatter_b); - auto scatter_f = - make_shared(SCATTER_A + SCATTER_B, op::ParameterVector{SCATTER_A, SCATTER_B}); - - Shape shape_a{4, 6}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{2, 2}; - auto B = make_shared(element::f32, shape_b); - Shape shape_c{}; - auto C = make_shared(element::f32, shape_c); - Shape shape_r{4, 6}; - Shape window_shape{2, 3}; - auto window_strides = Strides{2, 3}; - auto f = make_shared( - make_shared(A, B, C, sel_f, scatter_f, window_shape, window_strides), - op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, - test::NDArray( - {{7, 2, 5, 3, 10, 2}, {3, 8, 9, 3, 4, 2}, {1, 5, 7, 5, 6, 1}, {0, 6, 2, 7, 2, 8}}) - .get_vector()); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, test::NDArray({{2, 6}, {3, 1}}).get_vector()); - auto c = backend->create_tensor(element::f32, shape_c); - copy_data(c, vector{0}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b, c}); - EXPECT_EQ((test::NDArray( - {{0, 0, 0, 0, 6, 0}, {0, 0, 2, 0, 0, 0}, {0, 0, 3, 0, 0, 0}, {0, 0, 0, 0, 0, 1}}) - .get_vector()), - read_vector(result)); -} - -// -// Adapted from the XLA docs to provide an example in >2D: https://www.tensorflow.org/performance/xla/operation_semantics#selectandscatter -// -NGRAPH_TEST(${BACKEND_NAME}, select_and_scatter_3d_without_overlap) -{ - Shape shape_sel_a{}; - auto SEL_A = make_shared(element::f32, shape_sel_a); - Shape shape_sel_b{}; - auto SEL_B = make_shared(element::f32, shape_sel_b); - auto sel_f = make_shared(make_shared(SEL_A, SEL_B), - op::ParameterVector{SEL_A, SEL_B}); - - Shape shape_scatter_a{}; - auto SCATTER_A = make_shared(element::f32, shape_scatter_a); - Shape shape_scatter_b{}; - auto SCATTER_B = make_shared(element::f32, shape_scatter_b); - auto scatter_f = - make_shared(SCATTER_A + SCATTER_B, op::ParameterVector{SCATTER_A, SCATTER_B}); - - Shape shape_a{2, 4, 6}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{1, 2, 2}; - auto B = make_shared(element::f32, shape_b); - Shape shape_c{}; - auto C = make_shared(element::f32, shape_c); - Shape shape_r{2, 4, 6}; - Shape window_shape{2, 2, 3}; - auto window_strides = Strides{2, 2, 3}; - auto f = make_shared( - make_shared(A, B, C, sel_f, scatter_f, window_shape, window_strides), - op::ParameterVector{A, B, C}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data( - a, - test::NDArray( - {{{7, 2, 5, 3, 10, 2}, {3, 8, 9, 3, 4, 2}, {1, 5, 7, 5, 6, 1}, {0, 6, 2, 7, 2, 8}}, - {{2, 5, 8, 3, 4, 2}, {1, 2, 8, 4, 5, 2}, {10, 2, 3, 4, 1, 0}, {4, 1, 2, 4, 5, 7}}}) - .get_vector()); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, test::NDArray({{{2, 6}, {3, 1}}}).get_vector()); - auto c = backend->create_tensor(element::f32, shape_c); - copy_data(c, vector{0}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b, c}); - EXPECT_EQ( - (test::NDArray( - {{{0, 0, 0, 0, 6, 0}, {0, 0, 2, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 1}}, - {{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {3, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}}}) - .get_vector()), - read_vector(result)); -} - -template -void make_unary_empty_test(const string& backend_name) -{ - Shape shape{0}; - - op::ParameterVector params; - NodeVector result_list; - for (size_t i = 0; i < s_known_element_types.size(); i++) - { - shared_ptr p = make_shared(s_known_element_types[i], shape); - params.push_back(p); - result_list.push_back(make_shared(p)); - } - - auto f = make_shared(result_list, params); - auto backend = runtime::Backend::create(backend_name); - - vector> inputs; - vector> outputs; - for (size_t i = 0; i < s_known_element_types.size(); i++) - { - inputs.push_back(backend->create_tensor(s_known_element_types[i], shape)); - outputs.push_back(backend->create_tensor(s_known_element_types[i], shape)); - } - - backend->call_with_validate(f, outputs, inputs); - - EXPECT_EQ(read_vector(inputs[0]).size(), 0); - EXPECT_EQ(read_vector(inputs[1]).size(), 0); - EXPECT_EQ(read_vector(inputs[2]).size(), 0); - EXPECT_EQ(read_vector(inputs[3]).size(), 0); - EXPECT_EQ(read_vector(inputs[4]).size(), 0); - EXPECT_EQ(read_vector(inputs[5]).size(), 0); - EXPECT_EQ(read_vector(inputs[6]).size(), 0); - EXPECT_EQ(read_vector(inputs[7]).size(), 0); - EXPECT_EQ(read_vector(inputs[8]).size(), 0); - EXPECT_EQ(read_vector(inputs[9]).size(), 0); - - EXPECT_EQ(read_vector(outputs[0]).size(), 0); - EXPECT_EQ(read_vector(outputs[1]).size(), 0); - EXPECT_EQ(read_vector(outputs[2]).size(), 0); - EXPECT_EQ(read_vector(outputs[3]).size(), 0); - EXPECT_EQ(read_vector(outputs[4]).size(), 0); - EXPECT_EQ(read_vector(outputs[5]).size(), 0); - EXPECT_EQ(read_vector(outputs[6]).size(), 0); - EXPECT_EQ(read_vector(outputs[7]).size(), 0); - EXPECT_EQ(read_vector(outputs[8]).size(), 0); - EXPECT_EQ(read_vector(outputs[9]).size(), 0); -} - -template -void make_binary_empty_test(const string& backend_name, bool is_comparison = false) -{ - Shape shape{0}; - op::ParameterVector A; - for (size_t i = 0; i < s_known_element_types.size(); i++) - { - A.push_back(make_shared(s_known_element_types[i], shape)); - } - - NodeVector result_list; - for (shared_ptr p : A) - { - result_list.push_back(make_shared(p, p)); - } - - auto f = make_shared(result_list, A); - auto backend = runtime::Backend::create(backend_name); - - vector> inputs; - vector> outputs; - for (size_t i = 0; i < s_known_element_types.size(); i++) - { - inputs.push_back(backend->create_tensor(s_known_element_types[i], shape)); - if (is_comparison) - { - outputs.push_back(backend->create_tensor(element::from(), shape)); - } - else - { - outputs.push_back(backend->create_tensor(s_known_element_types[i], shape)); - } - } - - backend->call_with_validate(f, outputs, inputs); - - EXPECT_EQ(read_vector(inputs[0]).size(), 0); - EXPECT_EQ(read_vector(inputs[1]).size(), 0); - EXPECT_EQ(read_vector(inputs[2]).size(), 0); - EXPECT_EQ(read_vector(inputs[3]).size(), 0); - EXPECT_EQ(read_vector(inputs[4]).size(), 0); - EXPECT_EQ(read_vector(inputs[5]).size(), 0); - EXPECT_EQ(read_vector(inputs[6]).size(), 0); - EXPECT_EQ(read_vector(inputs[7]).size(), 0); - EXPECT_EQ(read_vector(inputs[8]).size(), 0); - EXPECT_EQ(read_vector(inputs[9]).size(), 0); - - if (is_comparison) - { - EXPECT_EQ(read_vector(outputs[0]).size(), 0); - EXPECT_EQ(read_vector(outputs[1]).size(), 0); - EXPECT_EQ(read_vector(outputs[2]).size(), 0); - EXPECT_EQ(read_vector(outputs[3]).size(), 0); - EXPECT_EQ(read_vector(outputs[4]).size(), 0); - EXPECT_EQ(read_vector(outputs[5]).size(), 0); - EXPECT_EQ(read_vector(outputs[6]).size(), 0); - EXPECT_EQ(read_vector(outputs[7]).size(), 0); - EXPECT_EQ(read_vector(outputs[8]).size(), 0); - EXPECT_EQ(read_vector(outputs[9]).size(), 0); - } - else - { - EXPECT_EQ(read_vector(outputs[0]).size(), 0); - EXPECT_EQ(read_vector(outputs[1]).size(), 0); - EXPECT_EQ(read_vector(outputs[2]).size(), 0); - EXPECT_EQ(read_vector(outputs[3]).size(), 0); - EXPECT_EQ(read_vector(outputs[4]).size(), 0); - EXPECT_EQ(read_vector(outputs[5]).size(), 0); - EXPECT_EQ(read_vector(outputs[6]).size(), 0); - EXPECT_EQ(read_vector(outputs[7]).size(), 0); - EXPECT_EQ(read_vector(outputs[8]).size(), 0); - EXPECT_EQ(read_vector(outputs[9]).size(), 0); - } -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_abs) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_ceiling) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_exp) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_floor) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_log) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_negative) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_not) -{ - Shape shape{0}; - auto A = make_shared(element::from(), shape); - auto f = make_shared(make_shared(A), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::from(), shape); - auto result = backend->create_tensor(element::from(), shape); - - backend->call_with_validate(f, {result}, {a}); - - auto in_vec = read_vector(a); - auto out_vec = read_vector(result); - - EXPECT_EQ(in_vec.size(), 0); - EXPECT_EQ(out_vec.size(), 0); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_sign) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_sqrt) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_sin) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_sinh) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_cos) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_cosh) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_tan) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_tanh) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_asin) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_acos) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_atan) -{ - make_unary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_add) -{ - make_binary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_divide) -{ - make_binary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_eq) -{ - make_binary_empty_test("${BACKEND_NAME}", true); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_greater) -{ - make_binary_empty_test("${BACKEND_NAME}", true); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_greatereq) -{ - make_binary_empty_test("${BACKEND_NAME}", true); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_less) -{ - make_binary_empty_test("${BACKEND_NAME}", true); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_lesseq) -{ - make_binary_empty_test("${BACKEND_NAME}", true); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_maximum) -{ - make_binary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_minimum) -{ - make_binary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_multiply) -{ - make_binary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_not_equal) -{ - make_binary_empty_test("${BACKEND_NAME}", true); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_power) -{ - make_binary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, zero_sized_subtract) -{ - make_binary_empty_test("${BACKEND_NAME}"); -} - -NGRAPH_TEST(${BACKEND_NAME}, convolution_outlining) -{ - Shape shape_a{1, 2, 2, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{2, 2, 1, 1}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{1, 2, 2, 2}; - auto conv1 = make_shared(A, - B, - Strides{1, 1}, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - auto conv2 = make_shared(conv1, - B, - Strides{1, 1}, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - auto f = make_shared(conv2, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{1.0f, 1.0f, 1.0f, 1.0f}); - auto result = backend->create_tensor(element::f32, shape_r); - - vector expected_result{4.0f, 4.0f, 4.0f, 4.0f, 4.0f, 4.0f, 4.0f, 4.0f}; - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ(vector{expected_result}, read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, computation_reuse) -{ - Shape shape_a{1, 16, 2, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{32, 16, 1, 1}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{1, 32, 2, 2}; - auto conv = make_shared(A, - B, - Strides{1, 1}, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - Shape pool_shape{1, 1}; - auto pool = make_shared(conv, pool_shape); - auto bias = make_shared( - op::Constant::create(element::f32, Shape{}, {2.14}), shape_r, AxisSet{0, 1, 2, 3}); - auto result_op = make_shared(pool + bias); - auto f = make_shared(ResultVector{result_op}, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - vector input(64, 1.0f); - vector weights(512, 0.5f); - vector rv(128); - - auto a = backend->create_tensor(element::f32, shape_a, input.data()); - auto b = backend->create_tensor(element::f32, shape_b, weights.data()); - auto result = backend->create_tensor(element::f32, shape_r, rv.data()); - - backend->call_with_validate(f, {result}, {a, b}); - - vector rv_saved(rv); - - b->set_stale(false); - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ(rv_saved, rv); -} - -NGRAPH_TEST(${BACKEND_NAME}, pad_interior_1d) -{ - Shape shape_a{6}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{16}; - Shape padding_below{0}; - Shape padding_above{0}; - Shape padding_interior{2}; - auto f = make_shared( - make_shared(A, B, padding_below, padding_above, padding_interior), - op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, test::NDArray({1, 2, 3, 4, 5, 6}).get_vector()); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{2112}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((test::NDArray( - {1, 2112, 2112, 2, 2112, 2112, 3, 2112, 2112, 4, 2112, 2112, 5, 2112, 2112, 6}) - .get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, pad_exterior_1d) -{ - Shape shape_a{6}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{15}; - Shape padding_below{4}; - Shape padding_above{5}; - Shape padding_interior{0}; - auto f = make_shared( - make_shared(A, B, padding_below, padding_above, padding_interior), - op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, test::NDArray({1, 2, 3, 4, 5, 6}).get_vector()); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{2112}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((test::NDArray( - {2112, 2112, 2112, 2112, 1, 2, 3, 4, 5, 6, 2112, 2112, 2112, 2112, 2112}) - .get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, pad_interior_exterior_1d) -{ - Shape shape_a{6}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{25}; - Shape padding_below{4}; - Shape padding_above{5}; - Shape padding_interior{2}; - auto f = make_shared( - make_shared(A, B, padding_below, padding_above, padding_interior), - op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, test::NDArray({1, 2, 3, 4, 5, 6}).get_vector()); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{2112}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((test::NDArray({2112, 2112, 2112, 2112, 1, 2112, 2112, 2, 2112, - 2112, 3, 2112, 2112, 4, 2112, 2112, 5, 2112, - 2112, 6, 2112, 2112, 2112, 2112, 2112}) - .get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, pad_interior_exterior_2d) -{ - Shape shape_a{2, 3}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{7, 6}; - Shape padding_below{1, 0}; - Shape padding_above{2, 1}; - Shape padding_interior{2, 1}; - auto f = make_shared( - make_shared(A, B, padding_below, padding_above, padding_interior), - op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, test::NDArray({{1, 2, 3}, {4, 5, 6}}).get_vector()); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{9}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((test::NDArray({{9, 9, 9, 9, 9, 9}, - {1, 9, 2, 9, 3, 9}, - {9, 9, 9, 9, 9, 9}, - {9, 9, 9, 9, 9, 9}, - {4, 9, 5, 9, 6, 9}, - {9, 9, 9, 9, 9, 9}, - {9, 9, 9, 9, 9, 9}}) - .get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, pad_exterior_2d_0x0) -{ - Shape shape_a{0, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{5, 5}; - Shape padding_below{2, 3}; - Shape padding_above{3, 2}; - Shape padding_interior{0, 0}; - auto f = make_shared( - make_shared(A, B, padding_below, padding_above, padding_interior), - op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - // copy_data(a, test::NDArray({{}}).get_vector()); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{2112}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((test::NDArray({{2112, 2112, 2112, 2112, 2112}, - {2112, 2112, 2112, 2112, 2112}, - {2112, 2112, 2112, 2112, 2112}, - {2112, 2112, 2112, 2112, 2112}, - {2112, 2112, 2112, 2112, 2112}}) - .get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, pad_exterior_2d_0x3) -{ - Shape shape_a{0, 3}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{5, 5}; - Shape padding_below{2, 1}; - Shape padding_above{3, 1}; - Shape padding_interior{0, 0}; - auto f = make_shared( - make_shared(A, B, padding_below, padding_above, padding_interior), - op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - // copy_data(a, test::NDArray({}).get_vector()); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{2112}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((test::NDArray({{2112, 2112, 2112, 2112, 2112}, - {2112, 2112, 2112, 2112, 2112}, - {2112, 2112, 2112, 2112, 2112}, - {2112, 2112, 2112, 2112, 2112}, - {2112, 2112, 2112, 2112, 2112}}) - .get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, pad_exterior_2d_3x0) -{ - Shape shape_a{3, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{5, 5}; - Shape padding_below{1, 3}; - Shape padding_above{1, 2}; - Shape padding_interior{0, 0}; - auto f = make_shared( - make_shared(A, B, padding_below, padding_above, padding_interior), - op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - // copy_data(a, test::NDArray({}).get_vector()); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{2112}); - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((test::NDArray({{2112, 2112, 2112, 2112, 2112}, - {2112, 2112, 2112, 2112, 2112}, - {2112, 2112, 2112, 2112, 2112}, - {2112, 2112, 2112, 2112, 2112}, - {2112, 2112, 2112, 2112, 2112}}) - .get_vector()), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, pad_exterior_4d_1x2x2x2) -{ - Shape shape_a{1, 2, 2, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{1, 2, 4, 4}; - Shape padding_below{0, 0, 1, 1}; - Shape padding_above{0, 0, 1, 1}; - Shape padding_interior{0, 0, 0, 0}; - auto f = make_shared( - make_shared(A, B, padding_below, padding_above, padding_interior), - op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - // clang-format off - copy_data(a, test::NDArray( - { - { - { - {0.0f, 0.0f}, - {0.0f, 0.0f} - }, - { - {0.0f, 0.0f}, - {0.0f, 0.0f} - } - } - }).get_vector()); - // clang-format on - - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{42}); - - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - // clang-format off - EXPECT_EQ((test::NDArray( - { - { - { - {42.0f, 42.0f, 42.0f, 42.0f}, - {42.0f, 0.0f, 0.0f, 42.0f}, - {42.0f, 0.0f, 0.0f, 42.0f}, - {42.0f, 42.0f, 42.0f, 42.0f} - }, - { - {42.0f, 42.0f, 42.0f, 42.0f}, - {42.0f, 0.0f, 0.0f, 42.0f}, - {42.0f, 0.0f, 0.0f, 42.0f}, - {42.0f, 42.0f, 42.0f, 42.0f} - } - } - }).get_vector()), - read_vector(result)); - // clang-format on -} - -// This is a regression test for one of TF's unit tests, which was failing. -// The problem was inappropriate handling of the shape computation for a -// zero-length axis with interior padding. Rather than subtract 1 from the -// source shape and multiply by the interior padding (which causes underflow), -// we should just count the pre-interior-padding length as zero. -NGRAPH_TEST(${BACKEND_NAME}, pad_interior_exterior_4d_2x0x3x2) -{ - Shape shape_a{2, 0, 3, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{}; - auto B = make_shared(element::f32, shape_b); - Shape padding_below{1, 0, 0, 0}; - Shape padding_above{0, 2, 0, 0}; - Shape padding_interior{2, 1, 0, 0}; - Shape shape_r{5, 2, 3, 2}; - auto f = make_shared( - make_shared(A, B, padding_below, padding_above, padding_interior), - op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - // copy_data(a, test::NDArray({}).get_vector()); - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{2112}); - auto result = backend->create_tensor(element::f32, shape_r); - - vector expected(5 * 2 * 3 * 2, 2112); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ(expected, read_vector(result)); -} - -// This test covers the case with multiple image and with asymetric pad -// bug has been found on nvGPU side now covered by this test -NGRAPH_TEST(${BACKEND_NAME}, pad_2channel_2image_asym) -{ - Shape shape_a{2, 2, 4, 4}; - auto window_movement_strides = Strides{2, 2}; - Shape padding_below{0, 0, 0, 0}; - Shape padding_above{0, 0, 2, 2}; - Shape padding_interior{0, 0, 0, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{2, 2, 6, 6}; - auto f = make_shared( - make_shared(A, B, padding_below, padding_above, padding_interior), - op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, - test::NDArray({{{{0, 1, 0, 2}, // img 0 chan 0 - {0, 3, 2, 0}, - {2, 0, 0, 0}, - {0, 2, 1, 0}}, - - {{0, 0, 0, 2}, // img 0 chan 1 - {0, 2, 3, 0}, - {2, 0, 1, 0}, - {2, 0, 0, 0}}}, - - {{{0, 2, 1, 1}, // img 1 chan 0 - {0, 0, 2, 0}, - {0, 0, 1, 2}, - {0, 0, 0, 0}}, - - {{2, 1, 0, 0}, // img 1 chan 1 - {0, 2, 0, 0}, - {1, 1, 2, 0}, - {1, 0, 0, 0}}}}) - .get_vector()); - - auto b = backend->create_tensor(element::f32, shape_b); - copy_data(b, vector{42}); - - auto result = backend->create_tensor(element::f32, shape_r); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((test::NDArray({{{{0, 1, 0, 2, 42, 42}, // img 0 chan 0 - {0, 3, 2, 0, 42, 42}, - {2, 0, 0, 0, 42, 42}, - {0, 2, 1, 0, 42, 42}, - {42, 42, 42, 42, 42, 42}, - {42, 42, 42, 42, 42, 42}}, - - {{0, 0, 0, 2, 42, 42}, // img 1 chan 0 - {0, 2, 3, 0, 42, 42}, - {2, 0, 1, 0, 42, 42}, - {2, 0, 0, 0, 42, 42}, - {42, 42, 42, 42, 42, 42}, - {42, 42, 42, 42, 42, 42}}}, - - {{{0, 2, 1, 1, 42, 42}, // img 1 chan 0 - {0, 0, 2, 0, 42, 42}, - {0, 0, 1, 2, 42, 42}, - {0, 0, 0, 0, 42, 42}, - {42, 42, 42, 42, 42, 42}, - {42, 42, 42, 42, 42, 42}}, - - {{2, 1, 0, 0, 42, 42}, // img 1 chan 1 - {0, 2, 0, 0, 42, 42}, - {1, 1, 2, 0, 42, 42}, - {1, 0, 0, 0, 42, 42}, - {42, 42, 42, 42, 42, 42}, - {42, 42, 42, 42, 42, 42}}}}) - .get_vector()), - read_vector(result)); -} - -// Trivial case with no reduced axes. -NGRAPH_TEST(${BACKEND_NAME}, product_trivial) -{ - Shape shape{2, 2}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{1, 2, 3, 4}); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); -} - -// Failure has been reported at 5D for some reason -NGRAPH_TEST(${BACKEND_NAME}, product_trivial_5d) -{ - Shape shape{2, 2, 2, 2, 2}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, product_to_scalar) -{ - Shape shape{2, 2}; - auto A = make_shared(element::f32, shape); - auto f = - make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{1, 2, 3, 4}); - auto result = backend->create_tensor(element::f32, Shape{}); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{24}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, product_matrix_columns) -{ - Shape shape_a{3, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{2}; - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{15, 48}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{1, 2, 3, 4, 5, 6}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, product_matrix_rows) -{ - Shape shape_a{3, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3}; - auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{2, 12, 30}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{1, 2, 3, 4, 5, 6}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, product_matrix_rows_zero) -{ - Shape shape_a{3, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3}; - auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3, 3, 3})); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 1, 1}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, product_matrix_cols_zero) -{ - // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})). - Shape shape_a{0, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{2}; - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3, 3})); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 1}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, product_vector_zero) -{ - Shape shape_a{0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{}; - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3})); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, product_matrix_to_scalar_zero_by_zero) -{ - Shape shape_a{0, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{}; - auto f = - make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3})); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, product_3d_to_matrix_most_sig) -{ - Shape shape_a{3, 3, 3}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3, 3}; - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1 * 10 * 19, - 2 * 11 * 20, - 3 * 12 * 21, - 4 * 13 * 22, - 5 * 14 * 23, - 6 * 15 * 24, - 7 * 16 * 25, - 8 * 17 * 26, - 9 * 18 * 27}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, product_3d_to_matrix_least_sig) -{ - Shape shape_a{3, 3, 3}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3, 3}; - auto f = make_shared(make_shared(A, AxisSet{2}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1 * 2 * 3, - 4 * 5 * 6, - 7 * 8 * 9, - 10 * 11 * 12, - 13 * 14 * 15, - 16 * 17 * 18, - 19 * 20 * 21, - 22 * 23 * 24, - 25 * 26 * 27}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, product_3d_to_vector) -{ - Shape shape_a{3, 3, 3}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3}; - auto f = - make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1.0f * 10.0f * 19.0f * 4.0f * 13.0f * 22.0f * 7.0f * 16.0f * 25.0f, - 2.0f * 11.0f * 20.0f * 5.0f * 14.0f * 23.0f * 8.0f * 17.0f * 26.0f, - 3.0f * 12.0f * 21.0f * 6.0f * 15.0f * 24.0f * 9.0f * 18.0f * 27.0f}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, product_3d_to_scalar) -{ - Shape shape_a{3, 3, 3}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{}; - auto f = make_shared(make_shared(A, AxisSet{0, 1, 2}), - op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_TRUE(test::all_close(vector{1.0f * 10.0f * 9.0f * 4.0f * 13.0f * 6.0f * 7.0f * - 12.0f * 3.0f * 2.0f * 11.0f * 8.0f * 5.0f * 14.0f * - 5.0f * 8.0f * 11.0f * 2.0f * 3.0f * 12.0f * 7.0f * - 6.0f * 13.0f * 4.0f * 9.0f * 10.0f * 1.0f}, - read_vector(result))); -} - -NGRAPH_TEST(${BACKEND_NAME}, product_3d_eliminate_zero_dim) -{ - Shape shape_a{3, 0, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3, 2}; - auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - - // Overwrite the initial result vector to make sure we're not just coincidentally getting the right value. - copy_data(result, vector{2112, 2112, 2112, 2112, 2112, 2112}); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 1, 1, 1, 1, 1}), read_vector(result)); -} - -// Trivial case with no reduced axes. -NGRAPH_TEST(${BACKEND_NAME}, max_trivial) -{ - Shape shape{2, 2}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{1, 2, 3, 4}); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); -} - -// Failure has been reported at 5D for some reason -NGRAPH_TEST(${BACKEND_NAME}, max_trivial_5d) -{ - Shape shape{2, 2, 2, 2, 2}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, max_to_scalar) -{ - Shape shape{2, 2}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{1, 2, 3, 4}); - auto result = backend->create_tensor(element::f32, Shape{}); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{4}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, max_matrix_columns) -{ - Shape shape_a{3, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{2}; - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{5, 6}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{1, 2, 3, 4, 5, 6}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, max_matrix_rows) -{ - Shape shape_a{3, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3}; - auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{2, 4, 6}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{1, 2, 3, 4, 5, 6}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, max_matrix_rows_zero) -{ - Shape shape_a{3, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3}; - auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3, 3, 3})); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{-std::numeric_limits::infinity(), - -std::numeric_limits::infinity(), - -std::numeric_limits::infinity()}), - read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, max_matrix_cols_zero) -{ - // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})). - Shape shape_a{0, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{2}; - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3, 3})); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{-std::numeric_limits::infinity(), - -std::numeric_limits::infinity()}), - read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, max_vector_zero) -{ - Shape shape_a{0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{}; - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3})); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{-std::numeric_limits::infinity()}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, max_matrix_to_scalar_zero_by_zero) -{ - Shape shape_a{0, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{}; - auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3})); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{-std::numeric_limits::infinity()}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, max_3d_to_matrix_most_sig) -{ - Shape shape_a{3, 3, 3}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3, 3}; - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{19, 20, 21, 22, 23, 24, 25, 26, 27}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, max_3d_to_matrix_least_sig) -{ - Shape shape_a{3, 3, 3}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3, 3}; - auto f = make_shared(make_shared(A, AxisSet{2}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{3, 6, 9, 12, 15, 18, 21, 24, 27}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, max_3d_to_vector) -{ - Shape shape_a{3, 3, 3}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3}; - auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{25.0f, 26.0f, 27.0f}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, max_3d_to_scalar) -{ - Shape shape_a{3, 3, 3}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{}; - auto f = - make_shared(make_shared(A, AxisSet{0, 1, 2}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{14.0f}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, max_3d_eliminate_zero_dim) -{ - Shape shape_a{3, 0, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3, 2}; - auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - - // Overwrite the initial result vector to make sure we're not just coincidentally getting the right value. - copy_data(result, vector{2112, 2112, 2112, 2112, 2112, 2112}); - - float mi = -std::numeric_limits::infinity(); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{mi, mi, mi, mi, mi, mi}), read_vector(result)); -} - -// Trivial case with no reduced axes. -NGRAPH_TEST(${BACKEND_NAME}, min_trivial) -{ - Shape shape{2, 2}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{1, 2, 3, 4}); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(result)); -} - -// Failure has been reported at 5D for some reason -NGRAPH_TEST(${BACKEND_NAME}, min_trivial_5d) -{ - Shape shape{2, 2, 2, 2, 2}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, min_to_scalar) -{ - Shape shape{2, 2}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{1, 2, 3, 4}); - auto result = backend->create_tensor(element::f32, Shape{}); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{1, 2, 3, 4}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, min_matrix_columns) -{ - Shape shape_a{3, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{2}; - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 2}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{1, 2, 3, 4, 5, 6}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, min_matrix_rows) -{ - Shape shape_a{3, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3}; - auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 3, 5}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{1, 2, 3, 4, 5, 6}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, min_matrix_rows_zero) -{ - Shape shape_a{3, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3}; - auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3, 3, 3})); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{std::numeric_limits::infinity(), - std::numeric_limits::infinity(), - std::numeric_limits::infinity()}), - read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, min_matrix_cols_zero) -{ - // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})). - Shape shape_a{0, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{2}; - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3, 3})); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{std::numeric_limits::infinity(), - std::numeric_limits::infinity()}), - read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, min_vector_zero) -{ - Shape shape_a{0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{}; - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3})); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{std::numeric_limits::infinity()}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, min_matrix_to_scalar_zero_by_zero) -{ - Shape shape_a{0, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{}; - auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3})); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{std::numeric_limits::infinity()}), read_vector(result)); - - // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the - // input tensors, so let's do this too. - EXPECT_EQ((vector{}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, min_3d_to_matrix_most_sig) -{ - Shape shape_a{3, 3, 3}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3, 3}; - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 2, 3, 4, 5, 6, 7, 8, 9}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, min_3d_to_matrix_least_sig) -{ - Shape shape_a{3, 3, 3}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3, 3}; - auto f = make_shared(make_shared(A, AxisSet{2}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 4, 7, 10, 13, 16, 19, 22, 25}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, min_3d_to_vector) -{ - Shape shape_a{3, 3, 3}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3}; - auto f = make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1, 2, 3}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, min_3d_to_scalar) -{ - Shape shape_a{3, 3, 3}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{}; - auto f = - make_shared(make_shared(A, AxisSet{0, 1, 2}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}); - auto result = backend->create_tensor(element::f32, shape_rt); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{1}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, min_3d_eliminate_zero_dim) -{ - Shape shape_a{3, 0, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3, 2}; - auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - - // Overwrite the initial result vector to make sure we're not just coincidentally getting the right value. - copy_data(result, vector{2112, 2112, 2112, 2112, 2112, 2112}); - - float inf = std::numeric_limits::infinity(); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ((vector{inf, inf, inf, inf, inf, inf}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, sigmoid_n1c1h2w2) -{ - auto input = make_shared(element::f32, Shape{1, 1, 2, 2}); - auto sigmoid_node = make_shared(input); - auto func = make_shared(sigmoid_node, op::ParameterVector{input}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - shared_ptr a = backend->create_tensor(element::f32, input->get_shape()); - shared_ptr result = backend->create_tensor(element::f32, input->get_shape()); - - vector dataA{1.0f, 4.0f, 1.0f, 4.0f}; - copy_data(a, dataA); - - backend->call_with_validate(func, {result}, {a}); - vector expected{0.73105858f, 0.98201379f, 0.73105858f, 0.98201379f}; - ASSERT_TRUE(read_vector(result) == expected); -} - -NGRAPH_TEST(${BACKEND_NAME}, sigmoid_n1c1h4) -{ - auto input = make_shared(element::f32, Shape{1, 1, 4}); - auto sigmoid_node = make_shared(input); - auto func = make_shared(sigmoid_node, op::ParameterVector{input}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - shared_ptr a = backend->create_tensor(element::f32, input->get_shape()); - shared_ptr result = backend->create_tensor(element::f32, input->get_shape()); - - vector dataA{1.0f, 4.0f, 1.0f, 4.0f}; - copy_data(a, dataA); - - backend->call_with_validate(func, {result}, {a}); - vector expected{0.73105858f, 0.98201379f, 0.73105858f, 0.98201379f}; - ASSERT_TRUE(read_vector(result) == expected); -} - -NGRAPH_TEST(${BACKEND_NAME}, sigmoid_bprop_n1c1h4) -{ - auto input = make_shared(element::f32, Shape{1, 1, 4}); - auto delta = make_shared(element::f32, Shape{1, 1, 4}); - auto sigmoid_node = make_shared(input, delta); - auto func = make_shared(sigmoid_node, op::ParameterVector{input, delta}); - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - shared_ptr a = backend->create_tensor(element::f32, input->get_shape()); - shared_ptr b = backend->create_tensor(element::f32, delta->get_shape()); - shared_ptr result = backend->create_tensor(element::f32, input->get_shape()); - - vector dataA{1.0f, 4.0f, 1.0f, 4.0f}; - vector dataB{1.0f, 1.0f, 1.0f, 1.0f}; - - copy_data(a, dataA); - copy_data(b, dataB); - backend->call_with_validate(func, {result}, {a, b}); - - vector expected{0.196612f, 0.0176627f, 0.196612f, 0.0176627f}; - EXPECT_TRUE(test::all_close(expected, read_vector(result))); -} - -NGRAPH_TEST(${BACKEND_NAME}, relu_2Dfprop) -{ - auto shape_a = Shape{2, 5}; - auto A = make_shared(element::f32, shape_a); - auto relu = make_shared(A); - auto shape_rt = Shape{2, 5}; - auto f = make_shared(relu, op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5}); - auto result = backend->create_tensor(element::f32, shape_rt); - vector expected{1, 8, 0, 17, 0, 1, 8, 0, 17, 0}; - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ(read_vector(result), expected); -} - -NGRAPH_TEST(${BACKEND_NAME}, relu_4Dfprop) -{ - auto shape_a = Shape{2, 2, 2, 2}; - auto A = make_shared(element::f32, shape_a); - auto relu = make_shared(A); - auto shape_rt = Shape{2, 2, 2, 2}; - auto f = make_shared(relu, op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1}); - auto result = backend->create_tensor(element::f32, shape_rt); - vector expected{1, 8, 0, 17, 0, 1, 8, 0, 17, 0, 1, 8, 0, 17, 0, 1}; - - backend->call_with_validate(f, {result}, {a}); - EXPECT_EQ(read_vector(result), expected); -} - -NGRAPH_TEST(${BACKEND_NAME}, fuse_max_with_constant_zero_input_as_relu) -{ - auto shape_a = Shape{2, 5}; - auto A = op::Constant::create(element::f32, shape_a, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); - auto B = make_shared(element::f32, shape_a); - auto max = make_shared(A, B); - auto shape_rt = Shape{2, 5}; - auto f = make_shared(max, op::ParameterVector{B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto b = backend->create_tensor(element::f32, shape_a); - copy_data(b, vector{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5}); - auto result = backend->create_tensor(element::f32, shape_rt); - vector expected{1, 8, 0, 17, 0, 1, 8, 0, 17, 0}; - - backend->call_with_validate(f, {result}, {b}); - EXPECT_EQ(read_vector(result), expected); -} - -NGRAPH_TEST(${BACKEND_NAME}, relu_2Dbackprop) -{ - auto shape_a = Shape{2, 5}; - auto A = make_shared(element::f32, shape_a); - auto delta_val = make_shared(element::f32, shape_a); - auto relu = make_shared(A, delta_val); - auto shape_rt = Shape{2, 5}; - auto f = make_shared(relu, op::ParameterVector{A, delta_val}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5}); - auto delta = backend->create_tensor(element::f32, shape_a); - copy_data(delta, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); - auto result = backend->create_tensor(element::f32, shape_rt); - vector expected{1, 2, 0, 4, 0, 6, 7, 0, 9, 0}; - - backend->call_with_validate(f, {result}, {a, delta}); - EXPECT_EQ(read_vector(result), expected); -} - -NGRAPH_TEST(${BACKEND_NAME}, relu_4Dbackprop) -{ - auto shape_a = Shape{2, 2, 2, 2}; - auto A = make_shared(element::f32, shape_a); - auto delta_val = make_shared(element::f32, shape_a); - auto relu = make_shared(A, delta_val); - auto shape_rt = Shape{2, 2, 2, 2}; - auto f = make_shared(relu, op::ParameterVector{A, delta_val}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1}); - auto delta = backend->create_tensor(element::f32, shape_a); - copy_data(delta, vector{1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1, 8, -8, 17, -0.5, 1}); - auto result = backend->create_tensor(element::f32, shape_rt); - vector expected{1, 8, 0, 17, 0, 1, 8, 0, 17, 0, 1, 8, 0, 17, 0, 1}; - - backend->call_with_validate(f, {result}, {a, delta}); - EXPECT_EQ(read_vector(result), expected); -} - -NGRAPH_TEST(${BACKEND_NAME}, softmax_all) -{ - Shape shape{2, 3}; - auto A = make_shared(element::f32, shape); - auto f = - make_shared(make_shared(A, AxisSet{0, 1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{-3, -2, -1, 0, 1, 2}); - auto result = backend->create_tensor(element::f32, shape); - - auto d = expf(-3) + expf(-2) + expf(-1) + expf(0) + expf(1) + expf(2); - - backend->call_with_validate(f, {result}, {a}); - vector expected{ - expf(-3) / d, expf(-2) / d, expf(-1) / d, expf(0) / d, expf(1) / d, expf(2) / d}; - EXPECT_TRUE(test::all_close_f(expected, read_vector(result))); - - // empty AxisSet is the same as "full" AxisSet - f = make_shared(make_shared(A, AxisSet{}), op::ParameterVector{A}); - backend = runtime::Backend::create("${BACKEND_NAME}"); - - backend->call_with_validate(f, {result}, {a}); - EXPECT_TRUE(test::all_close_f(expected, read_vector(result))); -} - -NGRAPH_TEST(${BACKEND_NAME}, softmax_axis_3d) -{ - Shape shape{2, 2, 3}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{-10, -20, -30, -40, -50, -60, -1, -2, -3, -4, -5, -6}); - auto result = backend->create_tensor(element::f32, shape); - - auto d0 = expf(-10) + expf(-1); - auto d1 = expf(-20) + expf(-2); - auto d2 = expf(-30) + expf(-3); - auto d3 = expf(-40) + expf(-4); - auto d4 = expf(-50) + expf(-5); - auto d5 = expf(-60) + expf(-6); - - backend->call_with_validate(f, {result}, {a}); - vector expected{expf(-10) / d0, - expf(-20) / d1, - expf(-30) / d2, - expf(-40) / d3, - expf(-50) / d4, - expf(-60) / d5, - expf(-1) / d0, - expf(-2) / d1, - expf(-3) / d2, - expf(-4) / d3, - expf(-5) / d4, - expf(-6) / d5}; - - EXPECT_TRUE(test::all_close(expected, read_vector(result))); -} - -NGRAPH_TEST(${BACKEND_NAME}, softmax_axis_3d_double) -{ - Shape shape{2, 2, 3}; - auto A = make_shared(element::f64, shape); - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::f64, shape); - copy_data(a, vector{-10, -20, -30, -40, -50, -60, -1, -2, -3, -4, -5, -6}); - auto result = backend->create_tensor(element::f64, shape); - - auto d0 = expf(-10) + expf(-1); - auto d1 = expf(-20) + expf(-2); - auto d2 = expf(-30) + expf(-3); - auto d3 = expf(-40) + expf(-4); - auto d4 = expf(-50) + expf(-5); - auto d5 = expf(-60) + expf(-6); - - backend->call_with_validate(f, {result}, {a}); - vector expected{expf(-10) / d0, - expf(-20) / d1, - expf(-30) / d2, - expf(-40) / d3, - expf(-50) / d4, - expf(-60) / d5, - expf(-1) / d0, - expf(-2) / d1, - expf(-3) / d2, - expf(-4) / d3, - expf(-5) / d4, - expf(-6) / d5}; - - EXPECT_TRUE(test::all_close(expected, read_vector(result))); -} - -NGRAPH_TEST(${BACKEND_NAME}, softmax_axis) -{ - Shape shape{2, 3}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{1}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{-10, -20, -30, -40, -50, -60}); - auto result = backend->create_tensor(element::f32, shape); - - auto d0 = expf(-10) + expf(-20) + expf(-30); - auto d1 = expf(-40) + expf(-50) + expf(-60); - - backend->call_with_validate(f, {result}, {a}); - vector expected{expf(-10) / d0, - expf(-20) / d0, - expf(-30) / d0, - expf(-40) / d1, - expf(-50) / d1, - expf(-60) / d1}; - EXPECT_TRUE(test::all_close_f(expected, read_vector(result))); -} - -NGRAPH_TEST(${BACKEND_NAME}, softmax_axis_2) -{ - Shape shape{2, 3}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{-10, -20, -30, -40, -50, -60}); - auto result = backend->create_tensor(element::f32, shape); - - auto d0 = expf(-10) + expf(-40); - auto d1 = expf(-20) + expf(-50); - auto d2 = expf(-30) + expf(-60); - - backend->call_with_validate(f, {result}, {a}); - vector expected{expf(-10) / d0, - expf(-20) / d1, - expf(-30) / d2, - expf(-40) / d0, - expf(-50) / d1, - expf(-60) / d2}; - EXPECT_TRUE(test::all_close(expected, read_vector(result))); -} - -NGRAPH_TEST(${BACKEND_NAME}, softmax_axis_3d_trivial) -{ - Shape shape{1, 2, 3}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{-10, -20, -30, -40, -50, -60}); - auto result = backend->create_tensor(element::f32, shape); - - backend->call_with_validate(f, {result}, {a}); - vector expected{1, 1, 1, 1, 1, 1}; - EXPECT_TRUE(test::all_close(expected, read_vector(result))); -} - -NGRAPH_TEST(${BACKEND_NAME}, softmax_underflow) -{ - Shape shape{2, 3}; - auto A = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, AxisSet{0}), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto low = std::numeric_limits::lowest(); - - auto a = backend->create_tensor(element::f32, shape); - copy_data(a, vector{low, 1, 2, 3, 4, 5}); - auto result = backend->create_tensor(element::f32, shape); - - auto d0 = expf(low) + expf(3); - auto d1 = expf(1) + expf(4); - auto d2 = expf(2) + expf(5); - - backend->call_with_validate(f, {result}, {a}); - vector expected{ - expf(low) / d0, expf(1) / d1, expf(2) / d2, expf(3) / d0, expf(4) / d1, expf(5) / d2}; - EXPECT_TRUE(test::all_close(expected, read_vector(result))); -} - -NGRAPH_TEST(${BACKEND_NAME}, multiple_backends) -{ - Shape shape{2, 2}; - auto A1 = make_shared(element::f32, shape); - auto B1 = make_shared(element::f32, shape); - auto f = make_shared(A1 + B1, op::ParameterVector{A1, B1}); - - auto A2 = make_shared(element::f32, shape); - auto B2 = make_shared(element::f32, shape); - auto g = make_shared(A2 * B2, op::ParameterVector{A2, B2}); - - auto backend1 = runtime::Backend::create("${BACKEND_NAME}"); - - auto backend2 = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - shared_ptr a1 = backend1->create_tensor(element::f32, shape); - shared_ptr b1 = backend1->create_tensor(element::f32, shape); - shared_ptr result1 = backend1->create_tensor(element::f32, shape); - - shared_ptr a2 = backend2->create_tensor(element::f32, shape); - shared_ptr b2 = backend2->create_tensor(element::f32, shape); - shared_ptr result2 = backend2->create_tensor(element::f32, shape); - - copy_data(a1, test::NDArray({{1, 2}, {3, 4}}).get_vector()); - copy_data(b1, test::NDArray({{5, 6}, {7, 8}}).get_vector()); - - copy_data(a2, test::NDArray({{1, 2}, {3, 4}}).get_vector()); - copy_data(b2, test::NDArray({{5, 6}, {7, 8}}).get_vector()); - - backend1->call_with_validate(f, {result1}, {a1, b1}); - EXPECT_EQ(read_vector(result1), - (test::NDArray({{6, 8}, {10, 12}})).get_vector()); - - backend2->call_with_validate(g, {result2}, {a2, b2}); - EXPECT_EQ(read_vector(result2), - (test::NDArray({{5, 12}, {21, 32}})).get_vector()); -} - -NGRAPH_TEST(${BACKEND_NAME}, tensorview_custom_mem) -{ - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - Shape shape{2, 2}; - - auto make_external = [&]() { - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); - - return f; - }; - - auto f = make_external(); - - vector av{2, 4, 8, 16}; - vector bv{1, 2, 4, 8}; - // use custom mem with tensorview, no need to copy data - auto a = backend->create_tensor(element::f32, shape, av.data()); - auto b = backend->create_tensor(element::f32, shape, bv.data()); - - // use custom mem with result tensorview - vector rv{0, 0, 0, 0}; - auto result = backend->create_tensor(element::f32, shape, rv.data()); - - // result should be in memory without needing explict read - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((vector{2, 2, 2, 2}), rv); -} - -NGRAPH_TEST(${BACKEND_NAME}, validate_call_input_count) -{ - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - Shape shape{2, 2}; - - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); - - auto a = backend->create_tensor(element::f32, shape); - auto b = backend->create_tensor(element::f32, shape); - auto c = backend->create_tensor(element::f32, shape); - - EXPECT_ANY_THROW(backend->call_with_validate(f, {c}, {a})); -} - -NGRAPH_TEST(${BACKEND_NAME}, validate_call_input_type) -{ - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - Shape shape{2, 2}; - - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); - - auto a = backend->create_tensor(element::i32, shape); - auto b = backend->create_tensor(element::f32, shape); - auto c = backend->create_tensor(element::f32, shape); - - EXPECT_ANY_THROW(backend->call_with_validate(f, {c}, {a, b})); -} - -NGRAPH_TEST(${BACKEND_NAME}, validate_call_input_shape) -{ - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - Shape shape{2, 2}; - - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); - - auto a = backend->create_tensor(element::f32, {2, 3}); - auto b = backend->create_tensor(element::f32, shape); - auto c = backend->create_tensor(element::f32, shape); - - EXPECT_ANY_THROW(backend->call_with_validate(f, {c}, {a, b})); -} - -NGRAPH_TEST(${BACKEND_NAME}, validate_call_output_count) -{ - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - Shape shape{2, 2}; - - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); - - auto a = backend->create_tensor(element::f32, shape); - auto b = backend->create_tensor(element::f32, shape); - auto c = backend->create_tensor(element::f32, shape); - auto d = backend->create_tensor(element::f32, shape); - - EXPECT_ANY_THROW(backend->call_with_validate(f, {c, d}, {a, b})); -} - -NGRAPH_TEST(${BACKEND_NAME}, validate_call_output_type) -{ - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - Shape shape{2, 2}; - - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); - - auto a = backend->create_tensor(element::i32, shape); - auto b = backend->create_tensor(element::f32, shape); - auto c = backend->create_tensor(element::f32, shape); - - EXPECT_ANY_THROW(backend->call_with_validate(f, {a}, {b, c})); -} - -NGRAPH_TEST(${BACKEND_NAME}, validate_call_output_shape) -{ - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - Shape shape{2, 2}; - - auto A = make_shared(element::f32, shape); - auto B = make_shared(element::f32, shape); - auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); - - auto a = backend->create_tensor(element::f32, {2, 3}); - auto b = backend->create_tensor(element::f32, shape); - auto c = backend->create_tensor(element::f32, shape); - - EXPECT_ANY_THROW(backend->call_with_validate(f, {a}, {c, b})); -} - -NGRAPH_TEST(${BACKEND_NAME}, logical_and) -{ - Shape shape{2, 2, 2}; - auto A = make_shared(element::boolean, shape); - auto B = make_shared(element::boolean, shape); - auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::boolean, shape); - copy_data(a, vector{1, 0, 1, 1, 1, 0, 1, 0}); - auto b = backend->create_tensor(element::boolean, shape); - copy_data(b, vector{0, 0, 1, 0, 0, 1, 1, 0}); - auto result = backend->create_tensor(element::boolean, shape); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((vector{0, 0, 1, 0, 0, 0, 1, 0}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, logical_or) -{ - Shape shape{2, 2, 2}; - auto A = make_shared(element::boolean, shape); - auto B = make_shared(element::boolean, shape); - auto f = make_shared(make_shared(A, B), op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::boolean, shape); - copy_data(a, vector{1, 0, 1, 1, 1, 0, 1, 0}); - auto b = backend->create_tensor(element::boolean, shape); - copy_data(b, vector{0, 0, 1, 0, 0, 1, 1, 0}); - auto result = backend->create_tensor(element::boolean, shape); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ((vector{1, 0, 1, 1, 1, 1, 1, 0}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_b1c2h2w2) -{ - auto input_shape = Shape{1, 2, 2, 2}; - auto input = make_shared(element::f32, input_shape); - auto mean_shape = Shape{2}; - auto var_shape = Shape{2}; - auto gamma_shape = Shape{2}; - auto gamma = make_shared(element::f32, gamma_shape); - auto beta_shape = Shape{2}; - auto beta = make_shared(element::f32, beta_shape); - double eps = 0.001; - auto shape_r = Shape{1, 2, 2, 2}; - auto bn = make_shared(input, gamma, beta, eps); - - auto output_rt = std::make_shared(bn, 0); - auto mean_rt = std::make_shared(bn, 1); - auto variance_rt = std::make_shared(bn, 2); - - auto f = make_shared(NodeVector{output_rt, mean_rt, variance_rt}, - op::ParameterVector{input, gamma, beta}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto _input = backend->create_tensor(element::f32, Shape{1, 2, 2, 2}); - - copy_data(_input, - vector{0.54881352f, - 0.71518934f, - 0.60276335f, - 0.54488319f, - 0.42365479f, - 0.64589411f, - 0.4375872f, - 0.89177299f}); - auto _gamma = backend->create_tensor(element::f32, gamma_shape); - copy_data(_gamma, vector{1.0f, 1.0f}); - auto _beta = backend->create_tensor(element::f32, beta_shape); - copy_data(_beta, vector{0.0f, 0.0f}); - auto bn_output = backend->create_tensor(element::f32, shape_r); - auto result_mean = backend->create_tensor(element::f32, mean_shape); - auto result_variance = backend->create_tensor(element::f32, var_shape); - - vector expected_result{-0.71498716f, - 1.48388731f, - -0.00196938f, - -0.76693159f, - -0.91316032f, - 0.23943391f, - -0.84090298f, - 1.51462936f}; - vector expected_mean{0.602912f, 0.599727f}; - vector expected_variance{0.00472505f, 0.0361782f}; - - backend->call_with_validate( - f, {bn_output, result_mean, result_variance}, {_input, _gamma, _beta}); - - EXPECT_TRUE(test::all_close(expected_result, read_vector(bn_output), 1e-5f, 1e-6f)); - EXPECT_TRUE(test::all_close(expected_mean, read_vector(result_mean), 1e-5f, 1e-6f)); - EXPECT_TRUE( - test::all_close(expected_variance, read_vector(result_variance), 1e-5f, 1e-6f)); -} - -NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_b2c2h2w1) -{ - auto input_shape = Shape{2, 2, 2, 1}; - auto input = make_shared(element::f32, input_shape); - auto mean_shape = Shape{2}; - auto var_shape = Shape{2}; - auto gamma_shape = Shape{2}; - auto gamma = make_shared(element::f32, gamma_shape); - auto beta_shape = Shape{2}; - auto beta = make_shared(element::f32, beta_shape); - double eps = 0.001; - auto shape_r = Shape{2, 2, 2, 1}; - auto bn = make_shared(input, gamma, beta, eps); - - auto output_rt = std::make_shared(bn, 0); - auto mean_rt = std::make_shared(bn, 1); - auto variance_rt = std::make_shared(bn, 2); - - auto f = make_shared(NodeVector{output_rt, mean_rt, variance_rt}, - op::ParameterVector{input, gamma, beta}); - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - // Create some tensors for input/output - auto _input = backend->create_tensor(element::f32, input_shape); - copy_data(_input, - vector{0.54881352f, - 0.71518934f, - 0.60276335f, - 0.54488319f, - 0.42365479f, - 0.64589411f, - 0.4375872f, - 0.89177299f}); - - auto _gamma = backend->create_tensor(element::f32, gamma_shape); - copy_data(_gamma, vector{1.0f, 1.0f}); - auto _beta = backend->create_tensor(element::f32, beta_shape); - copy_data(_beta, vector{0.0f, 0.0f}); - auto bn_output = backend->create_tensor(element::f32, shape_r); - auto result_mean = backend->create_tensor(element::f32, mean_shape); - auto result_variance = backend->create_tensor(element::f32, var_shape); - - vector expected_result{ - -0.30327f, 1.1561f, -0.0963782f, -0.434702f, -1.4011f, 0.548275f, -1.06187f, 1.59295f}; - vector expected_mean{0.583388f, 0.619252f}; - vector expected_variance{0.0119972f, 0.0282681f}; - backend->call_with_validate( - f, {bn_output, result_mean, result_variance}, {_input, _gamma, _beta}); - - EXPECT_TRUE(test::all_close(expected_result, read_vector(bn_output))); - EXPECT_TRUE(test::all_close(expected_mean, read_vector(result_mean))); - EXPECT_TRUE( - test::all_close(expected_variance, read_vector(result_variance), 1e-5f, 1e-6f)); -} - -NGRAPH_TEST(${BACKEND_NAME}, batchnorm_bprop_n4c3h2w2) -{ - auto input_shape = Shape{4, 3, 2, 2}; - auto shape_mean = Shape{3}; - auto input = make_shared(element::f32, input_shape); - auto mean_shape = Shape{3}; - auto mean = make_shared(element::f32, mean_shape); - auto var_shape = Shape{3}; - auto var = make_shared(element::f32, var_shape); - auto gamma_shape = Shape{3}; - auto gamma = make_shared(element::f32, gamma_shape); - auto beta_shape = Shape{3}; - auto beta = make_shared(element::f32, beta_shape); - double eps = 0.001; - auto shape_r = Shape{4, 3, 2, 2}; - auto bn = make_shared(input, gamma, beta, eps); - auto bn_dx = make_shared(bn, 0); - auto bn_dgamma = make_shared(bn, 1); - auto bn_dbeta = make_shared(bn, 2); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto _input = backend->create_tensor(element::f32, input_shape); - vector dataInput{ - 10.76331902f, 11.51178265f, 10.31018162f, 12.2993021f, 14.17626667f, 14.63498497f, - 13.63494492f, 13.84248161f, 11.34602547f, 13.22014618f, 10.46686649f, 10.39842987f, - 12.94806862f, 11.71670246f, 14.94438076f, 13.13236618f, 13.40889645f, 12.76128387f, - 11.34430027f, 11.86629677f, 11.11464024f, 10.93221283f, 11.95324039f, 10.96581173f, - 13.05455494f, 14.41404247f, 13.11169434f, 11.26559448f, 10.89965153f, 14.08202171f, - 11.12685776f, 12.58428574f, 12.59247875f, 13.00187492f, 12.66310215f, 10.06655025f, - 12.62048626f, 14.47942352f, 13.84950638f, 10.61425877f, 11.47936344f, 13.06011772f, - 13.63069057f, 12.31748772f, 13.84555244f, 10.95815468f, 12.78933334f, 12.75389099f}; - copy_data(_input, dataInput); - auto _mean = backend->create_tensor(element::f32, mean_shape); - copy_data(_mean, vector{12.56472874f, 12.80312157f, 11.81676865f}); - auto _var = backend->create_tensor(element::f32, var_shape); - copy_data(_var, vector{1.94557643f, 1.32772446f, 1.28163588f}); - - auto _gamma = backend->create_tensor(element::f32, gamma_shape); - copy_data(_gamma, vector{2.0f, 2.0f, 2.0f}); - auto _beta = backend->create_tensor(element::f32, beta_shape); - copy_data(_beta, vector{1.0f, 1.0f, 1.0f}); - auto result = backend->create_tensor(element::f32, shape_r); - - shared_ptr _delta = backend->create_tensor(element::f32, shape_r); - vector deltaData(shape_size(shape_r), 20.0f); - copy_data(_delta, deltaData); - - auto f = make_shared(NodeVector{bn_dx, bn_dgamma, bn_dbeta}, - op::ParameterVector{mean, var, input, gamma, beta}); - - auto C = std::make_shared(element::f32, shape_r); - - auto zero = ngraph::make_zero(bn_dgamma->get_element_type(), bn_dgamma->get_shape()); - ngraph::autodiff::Adjoints adjoints(NodeVector{bn_dx, bn_dgamma, bn_dbeta}, - NodeVector{C, zero, zero}); - - auto dinput = adjoints.backprop_node(input); - auto dgamma = adjoints.backprop_node(gamma); - auto dbeta = adjoints.backprop_node(beta); - - auto df = make_shared(NodeVector{dinput, dgamma, dbeta}, - op::ParameterVector{mean, var, input, gamma, beta, C}); - - // roundtrip serialization - string js = serialize(df, 4); - istringstream in(js); - df = deserialize(in); - - shared_ptr _dinput = backend->create_tensor(element::f32, shape_r); - shared_ptr _dgamma = backend->create_tensor(element::f32, gamma_shape); - shared_ptr _dbeta = backend->create_tensor(element::f32, beta_shape); - - backend->call_with_validate( - df, {_dinput, _dgamma, _dbeta}, {_mean, _var, _input, _gamma, _beta, _delta}); - - vector expected_input{ - 8.17051607e-06f, 4.77576657e-06f, 1.02257760e-05f, 1.20387525e-06f, -1.73868522e-06f, - 3.84632768e-06f, -1.07932050e-05f, -2.57458956e-06f, -2.22166714e-06f, -8.38779043e-06f, - -2.48082982e-06f, 5.89238360e-06f, -2.52895109e-07f, -8.68433445e-06f, -5.82726737e-06f, - 8.84659658e-06f, 3.03944108e-05f, 4.05480879e-05f, 1.84123158e-05f, 2.30061178e-05f, - 1.34087590e-05f, -9.26072571e-07f, -3.22908454e-05f, -2.07365116e-05f, -4.21330941e-05f, - 2.83083100e-05f, -3.71039101e-05f, -4.84390640e-06f, -2.93012376e-05f, 5.68858087e-06f, - 1.83181458e-05f, -1.07494506e-05f, -2.32429103e-06f, 6.92914809e-06f, -6.66512321e-06f, - -7.00302840e-06f, -3.46675184e-06f, -4.36748381e-06f, 6.73822226e-07f, -4.20158993e-06f, - 3.83005061e-06f, 5.85143729e-06f, 4.17875243e-06f, -8.64167783e-06f, 1.00170803e-05f, - -4.23939666e-06f, 4.80201680e-06f, 4.62702078e-06f}; - - ASSERT_TRUE(ngraph::test::all_close(read_vector(_dinput), expected_input, 1e-3f, 1e-4f)); - vector expected_dgamma{7.06315041e-05f, -2.35289335e-04f, -5.06639481e-05f}; - ASSERT_TRUE( - ngraph::test::all_close(read_vector(_dgamma), expected_dgamma, 1e-2f, 1e-3f)); - vector expected_dbeta{320.f, 320.f, 320.f}; - ASSERT_TRUE(ngraph::test::all_close(read_vector(_dbeta), expected_dbeta, 1e-4f, 1e-8f)); -} - -NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_inference_b2c2h2w1) -{ - auto input_shape = Shape{2, 2, 2, 1}; - auto input = make_shared(element::f32, input_shape); - auto mean_shape = Shape{2}; - auto mean = make_shared(element::f32, mean_shape); - auto var_shape = Shape{2}; - auto var = make_shared(element::f32, var_shape); - auto gamma_shape = Shape{2}; - auto gamma = make_shared(element::f32, gamma_shape); - auto beta_shape = Shape{2}; - auto beta = make_shared(element::f32, beta_shape); - double eps = 0.001; - auto shape_r = Shape{2, 2, 2, 1}; - auto bn = make_shared(input, gamma, beta, mean, var, eps); - - auto f = make_shared(bn, op::ParameterVector{input, gamma, beta, mean, var}); - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - // Create some tensors for input/output - auto _input = backend->create_tensor(element::f32, input_shape); - copy_data(_input, - vector{0.54881352f, - 0.71518934f, - 0.60276335f, - 0.54488319f, - 0.42365479f, - 0.64589411f, - 0.4375872f, - 0.89177299f}); - - auto _gamma = backend->create_tensor(element::f32, gamma_shape); - copy_data(_gamma, vector{1.0f, 1.0f}); - auto _beta = backend->create_tensor(element::f32, beta_shape); - copy_data(_beta, vector{0.0f, 0.0f}); - auto _mean = backend->create_tensor(element::f32, mean_shape); - copy_data(_mean, vector{0.583388f, 0.619252f}); - auto _var = backend->create_tensor(element::f32, var_shape); - copy_data(_var, vector{0.0119972f, 0.0282681f}); - auto bn_output = backend->create_tensor(element::f32, shape_r); - - vector expected_result{ - -0.30327f, 1.1561f, -0.0963782f, -0.434702f, -1.4011f, 0.548275f, -1.06187f, 1.59295f}; - backend->call_with_validate(f, {bn_output}, {_input, _gamma, _beta, _mean, _var}); - - ASSERT_TRUE( - ngraph::test::all_close(expected_result, read_vector(bn_output), 1e-3f, 1e-4f)); -} - -#if 0 -NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_globalstats_b2c2w2h1) -{ - auto input_shape = Shape{2, 2, 2, 1}; - auto input = make_shared(element::f32, input_shape); - auto mean_shape = Shape{2}; - auto mean = make_shared(element::f32, mean_shape); - auto var_shape = Shape{2}; - auto var = make_shared(element::f32, var_shape); - auto gamma_shape = Shape{2}; - auto gamma = make_shared(element::f32, gamma_shape); - auto beta_shape = Shape{2}; - auto beta = make_shared(element::f32, beta_shape); - double eps = 0.001; - auto shape_r = Shape{2, 2, 2, 1}; - auto bn = make_shared(input, gamma, beta, mean, var, eps); - - auto f = make_shared(bn, op::ParameterVector{gamma, beta, input, mean, var}); - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - // Create some tensors for input/output - auto _input = backend->create_tensor(element::f32, input_shape); - copy_data(_input, - vector{0.54881352f, - 0.71518934f, - 0.60276335f, - 0.54488319f, - 0.42365479f, - 0.64589411f, - 0.4375872f, - 0.89177299f}); - - auto _gamma = backend->create_tensor(element::f32, gamma_shape); - copy_data(_gamma, vector{1.0f, 1.0f}); - auto _beta = backend->create_tensor(element::f32, beta_shape); - copy_data(_beta, vector{0.0f, 0.0f}); - auto _mean = backend->create_tensor(element::f32, mean_shape); - copy_data(_mean, vector{0.583388f, 0.619252f}); - auto _var = backend->create_tensor(element::f32, var_shape); - copy_data(_var, vector{0.0119972f, 0.0282681f}); - auto bn_output = backend->create_tensor(element::f32, shape_r); - - vector expected_result{ - -0.30327f, 1.1561f, -0.0963782f, -0.434702f, -1.4011f, 0.548275f, -1.06187f, 1.59295f}; - backend->call_with_validate(f, {bn_output}, {_gamma, _beta, _input, _mean, _var}); - - ASSERT_TRUE( - ngraph::test::all_close(expected_result, read_vector(bn_output), 1e-3f, 1e-4f)); -} -#endif - -NGRAPH_TEST(${BACKEND_NAME}, reverse_sequence_n2c3h4w2) -{ - Shape shape{2, 3, 4, 2}; - Shape seq_len_shape{4}; - auto A = make_shared(element::i32, shape); - auto B = make_shared(element::i32, seq_len_shape); - - size_t batch_axis = 2; - size_t sequence_axis = 1; - auto rs = std::make_shared(A, B, batch_axis, sequence_axis); - - auto f = make_shared(rs, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - shared_ptr a = backend->create_tensor(element::i32, shape); - shared_ptr b = backend->create_tensor(element::i32, seq_len_shape); - - shared_ptr result = backend->create_tensor(element::i32, shape); - - std::vector input{ - 0, 0, 3, 0, 6, 0, 9, 0, 1, 0, 4, 0, 7, 0, 10, 0, 2, 0, 5, 0, 8, 0, 11, 0, - 12, 0, 15, 0, 18, 0, 21, 0, 13, 0, 16, 0, 19, 0, 22, 0, 14, 0, 17, 0, 20, 0, 23, 0, - }; - - std::vector seq_lenghts{1, 2, 1, 2}; - copy_data(b, seq_lenghts); - - std::vector expected{ - 0, 0, 4, 0, 6, 0, 10, 0, 1, 0, 3, 0, 7, 0, 9, 0, 2, 0, 5, 0, 8, 0, 11, 0, - - 12, 0, 16, 0, 18, 0, 22, 0, 13, 0, 15, 0, 19, 0, 21, 0, 14, 0, 17, 0, 20, 0, 23, 0}; - - copy_data(a, input); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ(read_vector(result), expected); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_sequence_n4c3h2w2) -{ - Shape shape{4, 3, 2, 2}; - auto A = make_shared(element::i32, shape); - Shape seq_len_shape{4}; - auto B = make_shared(element::i32, seq_len_shape); - - size_t batch_axis = 0; - size_t sequence_axis = 1; - - auto rs = std::make_shared(A, B, batch_axis, sequence_axis); - - auto f = make_shared(rs, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - shared_ptr a = backend->create_tensor(element::i32, shape); - shared_ptr b = backend->create_tensor(element::i32, seq_len_shape); - - shared_ptr result = backend->create_tensor(element::i32, shape); - - std::vector seq_lenghts{1, 2, 3, 3}; - copy_data(b, seq_lenghts); - - std::vector input{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47}; - - std::vector expected{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, - 12, 13, 14, 15, 20, 21, 22, 23, 32, 33, 34, 35, 28, 29, 30, 31, - 24, 25, 26, 27, 44, 45, 46, 47, 40, 41, 42, 43, 36, 37, 38, 39}; - - copy_data(a, input); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ(read_vector(result), expected); -} - -NGRAPH_TEST(${BACKEND_NAME}, reverse_sequence_n4d2c3h2w2) -{ - Shape shape{4, 2, 3, 2, 2}; - auto A = make_shared(element::i32, shape); - Shape seq_len_shape{4}; - auto B = make_shared(element::i32, seq_len_shape); - - size_t batch_axis = 0; - size_t sequence_axis = 2; - - auto rs = std::make_shared(A, B, batch_axis, sequence_axis); - - auto f = make_shared(rs, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - shared_ptr a = backend->create_tensor(element::i32, shape); - shared_ptr b = backend->create_tensor(element::i32, seq_len_shape); - - shared_ptr result = backend->create_tensor(element::i32, shape); - - std::vector input{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; - - std::vector expected{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 28, 29, 30, 31, 24, 25, 26, 27, - 32, 33, 34, 35, 40, 41, 42, 43, 36, 37, 38, 39, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 76, 77, 78, 79, 72, 73, 74, 75, - 80, 81, 82, 83, 88, 89, 90, 91, 84, 85, 86, 87, 92, 93, 94, 95}; - - copy_data(a, input); - - std::vector seq_lenghts{1, 2, 1, 2}; - copy_data(b, seq_lenghts); - - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ(read_vector(result), expected); -} - -NGRAPH_TEST(${BACKEND_NAME}, generate_mask) -{ - Shape scalar{}; - Shape result_shape{1, 128}; - const unsigned int seed = 777; - auto training = op::Constant::create(element::f32, Shape{}, {1}); - auto gen_mask = make_shared(training, result_shape, element::f32, seed, 0.5); - auto gen_mask2 = make_shared(training, result_shape, element::f32, seed, 0.5); - auto f = make_shared(NodeVector{gen_mask, gen_mask2}, op::ParameterVector{}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto is_not_zero_or_one = [](float num) { return num != 0.f && num != 1.f; }; - - auto result_tv1 = backend->create_tensor(result_shape); - auto result_tv2 = backend->create_tensor(result_shape); - backend->call_with_validate(f, {result_tv1, result_tv2}, {}); - auto result1 = read_vector(result_tv1); - auto result2 = read_vector(result_tv2); - ASSERT_EQ(result1, result2); - ASSERT_FALSE(std::any_of(result1.begin(), result1.end(), is_not_zero_or_one)); - backend->call_with_validate(f, {result_tv1, result_tv2}, {}); - auto result1_2 = read_vector(result_tv1); - auto result2_2 = read_vector(result_tv2); - ASSERT_NE(result1, result1_2); - ASSERT_FALSE(std::any_of(result1_2.begin(), result1_2.end(), is_not_zero_or_one)); - ASSERT_NE(result2, result2_2); - ASSERT_FALSE(std::any_of(result2_2.begin(), result2_2.end(), is_not_zero_or_one)); -} - -NGRAPH_TEST(${BACKEND_NAME}, quantize) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape; - AxisSet quantization_axes; - - auto input_type = element::f32; - auto output_type = element::u8; - - typedef float input_c_type; - typedef uint8_t output_c_type; - - op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(input_type, scale_offset_shape, {2}); - auto offset = op::Constant::create(output_type, scale_offset_shape, {1}); - auto quantize = - make_shared(X, scale, offset, output_type, quantization_axes, round_mode); - auto f = make_shared(quantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); - // divide by scale 2 2 2 2 2 2 2 2 2 2 2 2 - // equals (rounded) 0 1 1 2 2 3 3 4 4 5 5 6 - // plus offset 1 1 1 1 1 1 1 1 1 1 1 1 - // equals 1 2 2 3 3 4 4 5 5 6 6 7 - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ((vector{1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, dequantize) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape; - AxisSet quantization_axes; - - auto input_type = element::u8; - auto output_type = element::f32; - - typedef uint8_t input_c_type; - typedef float output_c_type; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(output_type, scale_offset_shape, {2}); - auto offset = op::Constant::create(input_type, scale_offset_shape, {1}); - auto dequantize = make_shared(X, scale, offset, output_type, quantization_axes); - auto f = make_shared(dequantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7}); - // minus offset 1 1 1 1 1 1 1 1 1 1 1 1 - // eqauls 0 1 1 2 2 3 3 4 4 5 5 6 - // multiplied by scale 2 2 2 2 2 2 2 2 2 2 2 2 - // equals 0 2 2 4 4 6 6 8 8 10 10 12 - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ((vector{0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, dequantize_zero_offset) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape; - AxisSet quantization_axes; - - auto input_type = element::u8; - auto output_type = element::f32; - - typedef uint8_t input_c_type; - typedef float output_c_type; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(output_type, scale_offset_shape, {2}); - auto offset = op::Constant::create(input_type, scale_offset_shape, {0}); - auto dequantize = make_shared(X, scale, offset, output_type, quantization_axes); - auto f = make_shared(dequantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7}); - // minus offset 0 0 0 0 0 0 0 0 0 0 0 0 - // multiplied by scale 2 2 2 2 2 2 2 2 2 2 2 2 - // equals 2 4 4 6 6 8 8 10 10 12 12 14 - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ((vector{2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, quantize_axes) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape{4}; - AxisSet quantization_axes{0}; - - auto input_type = element::f32; - auto output_type = element::u8; - - typedef float input_c_type; - typedef uint8_t output_c_type; - - op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(input_type, scale_offset_shape, {2, 3, 4, 5}); - auto offset = op::Constant::create(output_type, scale_offset_shape, {10, 20, 30, 40}); - auto quantize = - make_shared(X, scale, offset, output_type, quantization_axes, round_mode); - auto f = make_shared(quantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); - // divided by scale 2 2 2 3 3 3 4 4 4 5 5 5 - // equals (rounded) 0 1 1 1 1 2 2 2 2 2 2 2 - // plus offset 10 10 10 20 20 20 30 30 30 40 40 40 - // equals 10 11 11 21 21 22 32 32 32 42 42 42 - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ((vector{10, 11, 11, 21, 21, 22, 32, 32, 32, 42, 42, 42}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, dequantize_axes) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape{4}; - AxisSet quantization_axes{0}; - - auto input_type = element::u8; - auto output_type = element::f32; - - typedef uint8_t input_c_type; - typedef float output_c_type; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(output_type, scale_offset_shape, {2, 3, 4, 5}); - auto offset = op::Constant::create(input_type, scale_offset_shape, {10, 20, 30, 40}); - auto dequantize = make_shared(X, scale, offset, output_type, quantization_axes); - auto f = make_shared(dequantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{10, 11, 11, 21, 21, 22, 32, 32, 32, 42, 42, 42}); - // minus offset 10 10 10 20 20 20 30 30 30 40 40 40 - // equals 0 1 1 1 1 2 2 2 2 2 2 2 - // multiplied by scale 2 2 2 3 3 3 4 4 4 5 5 5 - // equals 0 2 2 3 3 6 8 8 8 10 10 10 - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ((vector{0, 2, 2, 3, 3, 6, 8, 8, 8, 10, 10, 10}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, quantize_int8) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape; - AxisSet quantization_axes; - - auto input_type = element::f32; - auto output_type = element::i8; - - typedef float input_c_type; - typedef int8_t output_c_type; - - op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(input_type, scale_offset_shape, {2}); - auto offset = op::Constant::create(output_type, scale_offset_shape, {1}); - auto quantize = - make_shared(X, scale, offset, output_type, quantization_axes, round_mode); - auto f = make_shared(quantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11}); - // divide by scale 2 2 2 2 2 2 2 2 2 2 2 2 - // equals (rounded) 0 -1 1 -2 2 -3 3 -4 4 -5 5 -6 - // plus offset 1 1 1 1 1 1 1 1 1 1 1 1 - // equals 1 0 2 -1 3 -2 4 -3 5 -4 6 -5 - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ((vector{1, 0, 2, -1, 3, -2, 4, -3, 5, -4, 6, -5}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, dequantize_int8) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape; - AxisSet quantization_axes; - - auto input_type = element::i8; - auto output_type = element::f32; - - typedef int8_t input_c_type; - typedef float output_c_type; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(output_type, scale_offset_shape, {2}); - auto offset = op::Constant::create(input_type, scale_offset_shape, {1}); - auto dequantize = make_shared(X, scale, offset, output_type, quantization_axes); - auto f = make_shared(dequantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{1, 0, 2, -1, 3, -2, 4, -3, 5, -4, 6, -5}); - // minus offset 1 1 1 1 1 1 1 1 1 1 1 1 - // equals 0 -1 1 -2 2 -3 3 -4 4 -5 5 -6 - // multiplied by scale 2 2 2 2 2 2 2 2 2 2 2 2 - // equals 0 -2 2 -4 4 -6 6 -8 8 -10 10 -12 - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ((vector{0, -2, 2, -4, 4, -6, 6, -8, 8, -10, 10, -12}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, quantize_clamp) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape; - AxisSet quantization_axes; - - auto input_type = element::f32; - auto output_type = element::i8; - - typedef float input_c_type; - typedef int8_t output_c_type; - - op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(input_type, scale_offset_shape, {0.00001}); - auto offset = op::Constant::create(output_type, scale_offset_shape, {1}); - auto quantize = - make_shared(X, scale, offset, output_type, quantization_axes, round_mode); - auto f = make_shared(quantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11}); - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ( - (vector{1, -128, 127, -128, 127, -128, 127, -128, 127, -128, 127, -128}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_NEAREST_TOWARD_ZERO) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape; - AxisSet quantization_axes; - - auto input_type = element::f32; - auto output_type = element::i8; - - typedef float input_c_type; - typedef int8_t output_c_type; - - op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_ZERO; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); - auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); - auto quantize = - make_shared(X, scale, offset, output_type, quantization_axes, round_mode); - auto f = make_shared(quantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); - // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 - // equals (rounded) 2 2 3 -2 -2 -3 3 3 4 -3 -3 -4 - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ((vector{2, 2, 3, -2, -2, -3, 3, 3, 4, -3, -3, -4}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_NEAREST_UPWARD) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape; - AxisSet quantization_axes; - - auto input_type = element::f32; - auto output_type = element::i8; - - typedef float input_c_type; - typedef int8_t output_c_type; - - op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_UPWARD; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); - auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); - auto quantize = - make_shared(X, scale, offset, output_type, quantization_axes, round_mode); - auto f = make_shared(quantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); - // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 - // equals (rounded) 2 3 3 -2 -2 -3 3 4 4 -3 -3 -4 - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ((vector{2, 3, 3, -2, -2, -3, 3, 4, 4, -3, -3, -4}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_NEAREST_DOWNWARD) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape; - AxisSet quantization_axes; - - auto input_type = element::f32; - auto output_type = element::i8; - - typedef float input_c_type; - typedef int8_t output_c_type; - - op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_DOWNWARD; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); - auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); - auto quantize = - make_shared(X, scale, offset, output_type, quantization_axes, round_mode); - auto f = make_shared(quantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); - // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 - // equals (rounded) 2 2 3 -2 -3 -3 3 3 4 -3 -4 -4 - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ((vector{2, 2, 3, -2, -3, -3, 3, 3, 4, -3, -4, -4}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_NEAREST_TOWARD_EVEN) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape; - AxisSet quantization_axes; - - auto input_type = element::f32; - auto output_type = element::i8; - - typedef float input_c_type; - typedef int8_t output_c_type; - - op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); - auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); - auto quantize = - make_shared(X, scale, offset, output_type, quantization_axes, round_mode); - auto f = make_shared(quantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); - // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 - // equals (rounded) 2 2 3 -2 -2 -3 3 4 4 -3 -4 -4 - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ((vector{2, 2, 3, -2, -2, -3, 3, 4, 4, -3, -4, -4}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_TOWARD_INFINITY) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape; - AxisSet quantization_axes; - - auto input_type = element::f32; - auto output_type = element::i8; - - typedef float input_c_type; - typedef int8_t output_c_type; - - op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_TOWARD_INFINITY; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); - auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); - auto quantize = make_shared( - X, - scale, - offset, - output_type, - quantization_axes, - static_cast(static_cast(round_mode))); - auto f = make_shared(quantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); - // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 - // equals (rounded) 3 3 3 -3 -3 -3 4 4 4 -4 -4 -4 - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ((vector{3, 3, 3, -3, -3, -3, 4, 4, 4, -4, -4, -4}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_TOWARD_ZERO) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape; - AxisSet quantization_axes; - - auto input_type = element::f32; - auto output_type = element::i8; - - typedef float input_c_type; - typedef int8_t output_c_type; - - op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_TOWARD_ZERO; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); - auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); - auto quantize = make_shared( - X, - scale, - offset, - output_type, - quantization_axes, - static_cast(static_cast(round_mode))); - auto f = make_shared(quantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); - // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 - // equals (rounded) 2 2 2 -2 -2 -2 3 3 3 -3 -3 -3 - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ((vector{2, 2, 2, -2, -2, -2, 3, 3, 3, -3, -3, -3}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_UP) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape; - AxisSet quantization_axes; - - auto input_type = element::f32; - auto output_type = element::i8; - - typedef float input_c_type; - typedef int8_t output_c_type; - - op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_UP; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); - auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); - auto quantize = - make_shared(X, scale, offset, output_type, quantization_axes, round_mode); - auto f = make_shared(quantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); - // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 - // equals (rounded) 3 3 3 -2 -2 -2 4 4 4 -3 -3 -3 - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ((vector{3, 3, 3, -2, -2, -2, 4, 4, 4, -3, -3, -3}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, quantize_ROUND_DOWN) -{ - Shape input_shape{4, 3}; - Shape scale_offset_shape; - AxisSet quantization_axes; - - auto input_type = element::f32; - auto output_type = element::i8; - - typedef float input_c_type; - typedef int8_t output_c_type; - - op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_DOWN; - - auto X = make_shared(input_type, input_shape); - auto scale = op::Constant::create(input_type, scale_offset_shape, {4}); - auto offset = op::Constant::create(output_type, scale_offset_shape, {0}); - auto quantize = - make_shared(X, scale, offset, output_type, quantization_axes, round_mode); - auto f = make_shared(quantize, op::ParameterVector{X}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto x = backend->create_tensor(input_type, input_shape); - auto y = backend->create_tensor(output_type, input_shape); - - copy_data(x, vector{9, 10, 11, -9, -10, -11, 13, 14, 15, -13, -14, -15}); - // divide by scale 4 4 4 4 4 4 4 4 4 4 4 4 - // equals (rounded) 2 2 2 -3 -3 -3 3 3 3 -4 -4 -4 - - backend->call_with_validate(f, {y}, {x}); - EXPECT_EQ((vector{2, 2, 2, -3, -3, -3, 3, 3, 3, -4, -4, -4}), - read_vector(y)); -} - -NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_bprop) -{ - Shape sca{1}; - Shape vec{1, 1, 1, 2}; - double eps = 1.0e-04; - - auto g = std::make_shared(element::f32, sca); - auto b = std::make_shared(element::f32, sca); - auto input = std::make_shared(element::f32, vec); - auto bn_fp = std::make_shared(input, g, b, eps); - auto bnorm = std::make_shared(bn_fp, 0); - auto mean = std::make_shared(bn_fp, 1); - auto var = std::make_shared(bn_fp, 2); - - auto delta = std::make_shared(element::f32, vec); - auto bn_bp = - std::make_shared(bnorm, g, b, mean, var, delta, eps); - auto dx = std::make_shared(bn_bp, 0); - - std::vector> args = { - {1.0f}, // gamma - {1.0f}, // beta - {1.1f, 1.0f}, // x - {1.0f, 1.0f}, // dy - }; - - auto func = std::make_shared(dx, op::ParameterVector{g, b, input, delta}); - auto results = execute(func, args, "${BACKEND_NAME}"); - EXPECT_TRUE(test::all_close_f(std::vector{350.957, -388.67}, results.at(0))); -} - -NGRAPH_TEST(${BACKEND_NAME}, batchnorm_fprop_bprop_2step) -{ - Shape sca{1}; - Shape vec{1, 1, 1, 2}; - double eps = 1.0e-04; - - auto g = std::make_shared(element::f32, sca); - auto b = std::make_shared(element::f32, sca); - auto input = std::make_shared(element::f32, vec); - auto bn_fp = std::make_shared(input, g, b, eps); - auto bnorm = std::make_shared(bn_fp, 0); - auto mean = std::make_shared(bn_fp, 1); - auto var = std::make_shared(bn_fp, 2); - - auto func_bn = - std::make_shared(NodeVector{bnorm, mean, var}, op::ParameterVector{g, b, input}); - - std::vector> args = { - {1.0f}, // gamma - {1.0f}, // beta - {1.1f, 1.0f}, // x - }; - auto results = execute(func_bn, args, "${BACKEND_NAME}"); - - g = std::make_shared(element::f32, sca); - b = std::make_shared(element::f32, sca); - auto bn_output = std::make_shared(element::f32, vec); - auto m = std::make_shared(element::f32, sca); - auto v = std::make_shared(element::f32, sca); - auto delta = std::make_shared(element::f32, vec); - auto bn_bp = std::make_shared(bn_output, g, b, m, v, delta, eps); - auto dx = std::make_shared(bn_bp, 0); - - args.pop_back(); // remove x - args.push_back(results.at(0)); // bn_output - args.push_back(results.at(1)); // m - args.push_back(results.at(2)); // v - args.push_back({1.0f, 1.0f}); // dy - - auto func = std::make_shared(dx, op::ParameterVector{g, b, bn_output, m, v, delta}); - results = execute(func, args, "${BACKEND_NAME}"); - EXPECT_TRUE(test::all_close_f(std::vector{350.957, -388.67}, results.at(0))); -} - -NGRAPH_TEST(${BACKEND_NAME}, shape_of_scalar) -{ - Shape input_shape{}; - Shape output_shape{0}; - - auto A = std::make_shared(element::f32, input_shape); - auto f = std::make_shared(std::make_shared(A), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::f32, input_shape); - copy_data(a, vector{0}); - auto result = backend->create_tensor(element::u64, output_shape); - - backend->call_with_validate(f, {result}, {a}); - vector expected{}; - EXPECT_EQ(expected, read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, shape_of_vector) -{ - Shape input_shape{2}; - Shape output_shape{1}; - - auto A = std::make_shared(element::f32, input_shape); - auto f = std::make_shared(std::make_shared(A), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::f32, input_shape); - copy_data(a, vector(2, 0)); - auto result = backend->create_tensor(element::u64, output_shape); - - backend->call_with_validate(f, {result}, {a}); - vector expected{2}; - EXPECT_EQ(expected, read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, shape_of_matrix) -{ - Shape input_shape{2, 4}; - Shape output_shape{2}; - - auto A = std::make_shared(element::f32, input_shape); - auto f = std::make_shared(std::make_shared(A), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::f32, input_shape); - copy_data(a, vector(2 * 4, 0)); - auto result = backend->create_tensor(element::u64, output_shape); - - backend->call_with_validate(f, {result}, {a}); - vector expected{2, 4}; - EXPECT_EQ(expected, read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, shape_of_5d) -{ - Shape input_shape{2, 4, 8, 16, 32}; - Shape output_shape{5}; - - auto A = std::make_shared(element::f32, input_shape); - auto f = std::make_shared(std::make_shared(A), op::ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - auto a = backend->create_tensor(element::f32, input_shape); - copy_data(a, vector(2 * 4 * 8 * 16 * 32, 0)); - auto result = backend->create_tensor(element::u64, output_shape); - - backend->call_with_validate(f, {result}, {a}); - vector expected{2, 4, 8, 16, 32}; - EXPECT_EQ(expected, read_vector(result)); -} diff --git a/test/cpu_fusion.cpp-41c1ba06 b/test/cpu_fusion.cpp-41c1ba06 deleted file mode 100644 index e377ab0f432..00000000000 --- a/test/cpu_fusion.cpp-41c1ba06 +++ /dev/null @@ -1,3132 +0,0 @@ -//***************************************************************************** -// Copyright 2017-2018 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -//***************************************************************************** - -#include -#include -#include -#include -#include - -#include "gtest/gtest.h" -#include "ngraph/autodiff/adjoints.hpp" -#include "ngraph/file_util.hpp" -#include "ngraph/graph_util.hpp" -#include "ngraph/log.hpp" -#include "ngraph/ngraph.hpp" -#include "ngraph/op/batch_norm.hpp" -#include "ngraph/op/concat.hpp" -#include "ngraph/op/get_output_element.hpp" -#include "ngraph/op/max_pool.hpp" -#include "ngraph/op/negative.hpp" -#include "ngraph/op/parameter.hpp" -#include "ngraph/op/relu.hpp" -#include "ngraph/op/sigmoid.hpp" -#include "ngraph/op/sum.hpp" -#include "ngraph/op/tanh.hpp" -#include "ngraph/pass/algebraic_simplification.hpp" -#include "ngraph/pass/core_fusion.hpp" -#include "ngraph/pass/graph_rewrite.hpp" -#include "ngraph/pass/manager.hpp" -#include "ngraph/pass/reshape_elimination.hpp" -#include "ngraph/pass/visualize_tree.hpp" -#include "ngraph/pattern/matcher.hpp" -#include "ngraph/pattern/op/label.hpp" -#include "ngraph/pattern/op/skip.hpp" -#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp" -#include "ngraph/runtime/cpu/cpu_tensor_view.hpp" -#include "ngraph/runtime/cpu/op/batch_dot.hpp" -#include "ngraph/runtime/cpu/op/batch_norm_relu.hpp" -#include "ngraph/runtime/cpu/op/bounded_relu.hpp" -#include "ngraph/runtime/cpu/op/conv_add.hpp" -#include "ngraph/runtime/cpu/op/conv_bias.hpp" -#include "ngraph/runtime/cpu/op/conv_relu.hpp" -#include "ngraph/runtime/cpu/op/convert_layout.hpp" -#include "ngraph/runtime/cpu/op/group_conv.hpp" -#include "ngraph/runtime/cpu/op/group_conv_bias.hpp" -#include "ngraph/runtime/cpu/op/loop_kernel.hpp" -#include "ngraph/runtime/cpu/op/lstm.hpp" -#include "ngraph/runtime/cpu/op/matmul_bias.hpp" -#include "ngraph/runtime/cpu/op/rnn.hpp" -#include "ngraph/runtime/cpu/op/sigmoid_mul.hpp" -#include "ngraph/runtime/cpu/pass/cpu_concat_inputs.hpp" -#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp" -#include "ngraph/runtime/cpu/pass/cpu_loop_kernel_fusion.hpp" -#include "ngraph/runtime/cpu/pass/cpu_mat_fusion.hpp" -#include "ngraph/runtime/cpu/pass/cpu_post_layout_optimizations.hpp" -#include "ngraph/runtime/cpu/pass/cpu_rnn_fusion.hpp" -#include "ngraph/runtime/cpu/pass/cpu_workspace_insertion.hpp" -#include "ngraph/serializer.hpp" -#include "ngraph/util.hpp" -#include "nlohmann/json.hpp" -#include "util/all_close.hpp" -#include "util/autodiff/backprop_function.hpp" -#include "util/autodiff/numeric_compare.hpp" -#include "util/matcher.hpp" -#include "util/random.hpp" -#include "util/random.hpp" -#include "util/test_tools.hpp" - -using namespace ngraph; -using namespace std; - -TEST(cpu_fusion, gemm_pattern) -{ - Shape shape_w{2, 4}; - Shape shape_x{4, 1}; - Shape shape_b{1}; - auto A = make_shared(element::f32, shape_w); - auto B = make_shared(element::f32, shape_x); - auto C = make_shared(element::f32, shape_b); - - auto dot = make_shared(A, B); - auto broadcast = make_shared(C, dot->get_shape(), AxisSet{0}); - auto add = dot + broadcast; - - auto W = std::make_shared(A); - auto x = std::make_shared(B); - - auto reshape_pred = [](std::shared_ptr n) { - return static_cast(std::dynamic_pointer_cast(n)); - }; - - auto skip_w = std::make_shared(W, reshape_pred); - auto skip_x = std::make_shared(x, reshape_pred); - - auto pdot = make_shared(skip_w, skip_x); - auto b = std::make_shared(C); - auto pbroadcast = make_shared(b, dot->get_shape(), AxisSet{0}); - auto padd = pdot + pbroadcast; - - TestMatcher n(nullptr); - ASSERT_TRUE(n.match(padd, add)); - ASSERT_EQ(n.get_pattern_map()[W], A); - ASSERT_EQ(n.get_pattern_map()[x], B); - ASSERT_EQ(n.get_pattern_map()[b], C); - - auto reshape_w = make_shared(A, AxisVector{1, 0}, W->get_shape()); - auto reshape_x = make_shared(B, AxisVector{1, 0}, x->get_shape()); - auto re_dot = make_shared(reshape_w, reshape_x); - auto re_add = re_dot + broadcast; - ASSERT_TRUE(n.match(padd, re_add)); - ASSERT_EQ(n.get_pattern_map()[W], A); - ASSERT_EQ(n.get_pattern_map()[x], B); - ASSERT_EQ(n.get_pattern_map()[b], C); - - auto cg = make_shared( - W, x, C, W->get_shape(), x->get_shape(), false, false, AxisSet{0}); -} - -TEST(cpu_fusion, gemm_cpu_broadcast_row) -{ - Shape shapeA{3, 2}; - Shape shapeB{2, 3}; - Shape shapeC{2, 2}; - auto A = make_shared(element::f32, shapeA); - auto B = make_shared(element::f32, shapeB); - - auto bias = op::Constant::create(element::f32, Shape{2}, std::vector{2.0f, 3.0f}); - - auto cg = make_shared( - A, B, bias, A->get_shape(), B->get_shape(), true, true, AxisSet{0}); - - auto f = make_shared(cg, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("CPU"); - - shared_ptr a = backend->create_tensor(element::f32, shapeA); - shared_ptr b = backend->create_tensor(element::f32, shapeB); - shared_ptr result = backend->create_tensor(element::f32, shapeC); - - vector dataA{1.0f, 4.0f, 1.0f, 4.0f, 1.0f, 4.0f}; - vector dataB{3.0f, 3.0f, 3.0f, 9.0f, 9.0f, 9.0f}; - copy_data(a, dataA); - copy_data(b, dataB); - - backend->call_with_validate(f, {result}, {a, b}); - vector expected{11, 30, 38, 111}; - EXPECT_EQ(read_vector(result), expected); -} - -TEST(cpu_fusion, gemm_cpu_broadcast_column) -{ - Shape shapeA{3, 2}; - Shape shapeB{2, 3}; - Shape shapeC{2, 2}; - auto A = make_shared(element::f32, shapeA); - auto B = make_shared(element::f32, shapeB); - - auto bias = op::Constant::create(element::f32, Shape{2}, std::vector{2.0f, 3.0f}); - - auto cg = make_shared( - A, B, bias, A->get_shape(), B->get_shape(), true, true, AxisSet{1}); - - auto f = make_shared(cg, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("CPU"); - - shared_ptr a = backend->create_tensor(element::f32, shapeA); - shared_ptr b = backend->create_tensor(element::f32, shapeB); - shared_ptr result = backend->create_tensor(element::f32, shapeC); - - vector dataA{1.0f, 4.0f, 1.0f, 4.0f, 1.0f, 4.0f}; - vector dataB{3.0f, 3.0f, 3.0f, 9.0f, 9.0f, 9.0f}; - copy_data(a, dataA); - copy_data(b, dataB); - - backend->call_with_validate(f, {result}, {a, b}); - vector expected{11, 29, 39, 111}; - EXPECT_EQ(read_vector(result), expected); -} - -TEST(cpu_fusion, gemm_cpu_broadcast_matrix) -{ - Shape shapeA{3, 2}; - Shape shapeB{2, 3}; - Shape shapeC{2, 2}; - auto A = make_shared(element::f32, shapeA); - auto B = make_shared(element::f32, shapeB); - - auto reshape_w = make_shared(A, AxisVector{1, 0}, Shape{2, 3}); - auto reshape_x = make_shared(B, AxisVector{1, 0}, Shape{3, 2}); - - auto one = op::Constant::create(element::f32, Shape{}, std::vector{1.0f}); - - auto broadcast = make_shared(one, shapeC, AxisSet{0, 1}); - auto cg = make_shared( - A, B, one, A->get_shape(), B->get_shape(), true, true, AxisSet{0, 1}); - - auto f = make_shared(cg, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("CPU"); - - shared_ptr a = backend->create_tensor(element::f32, shapeA); - shared_ptr b = backend->create_tensor(element::f32, shapeB); - shared_ptr result = backend->create_tensor(element::f32, shapeC); - - vector dataA{1.0f, 4.0f, 1.0f, 4.0f, 1.0f, 4.0f}; - vector dataB{3.0f, 3.0f, 3.0f, 9.0f, 9.0f, 9.0f}; - copy_data(a, dataA); - copy_data(b, dataB); - - backend->call_with_validate(f, {result}, {a, b}); - vector expected{10, 28, 37, 109}; - ASSERT_TRUE(read_vector(result) == expected); -} - -TEST(cpu_fusion, gemm_cpu_no_bias) -{ - auto shapeA = Shape{3, 2}; - auto shapeB = Shape{2, 3}; - auto shapeC = Shape{2, 2}; - auto A = make_shared(element::f32, shapeA); - auto B = make_shared(element::f32, shapeB); - - auto reshape_w = make_shared(A, AxisVector{1, 0}, Shape{2, 3}); - auto reshape_x = make_shared(B, AxisVector{1, 0}, Shape{3, 2}); - - auto cg = - make_shared(A, B, nullptr, A->get_shape(), B->get_shape(), true, true); - - auto f = make_shared(cg, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("CPU"); - - shared_ptr a = backend->create_tensor(element::f32, shapeA); - shared_ptr b = backend->create_tensor(element::f32, shapeB); - shared_ptr result = backend->create_tensor(element::f32, shapeC); - - vector dataA{1.0f, 4.0f, 1.0f, 4.0f, 1.0f, 4.0f}; - vector dataB{3.0f, 3.0f, 3.0f, 9.0f, 9.0f, 9.0f}; - copy_data(a, dataA); - copy_data(b, dataB); - - backend->call_with_validate(f, {result}, {a, b}); - vector expected{9, 27, 36, 108}; - ASSERT_TRUE(read_vector(result) == expected); -} - -TEST(cpu_fusion, cpu_fusion_pass_basic) -{ - Shape shape{}; - Shape shape_w{2, 4}; - Shape shape_x{4, 1}; - Shape shape_b{1}; - auto A = make_shared(element::f32, shape_w); - auto B = make_shared(element::f32, shape_x); - auto C = make_shared(element::f32, shape_b); - - auto dot = make_shared(A, B); - auto broadcast = make_shared(C, dot->get_shape(), AxisSet{0}); - auto add = dot + broadcast; - auto graph = make_shared(add); - pass::Manager pass_manager; - pass_manager.register_pass( - runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); - auto func = make_shared(graph, op::ParameterVector{A, B, C}); - pass_manager.run_passes(func); - ASSERT_NE(std::dynamic_pointer_cast(graph->get_argument(0)), nullptr); -} - -TEST(cpu_fusion, commutative_matmul_bias) -{ - Shape shape{}; - Shape shape_w{2, 4}; - Shape shape_x{4, 1}; - Shape shape_b{1}; - auto A = make_shared(element::f32, shape_w); - auto B = make_shared(element::f32, shape_x); - auto C = make_shared(element::f32, shape_b); - - auto dot = make_shared(A, B); - auto broadcast = make_shared(C, dot->get_shape(), AxisSet{0}); - auto add = broadcast + dot; - auto graph = make_shared(add); - pass::Manager pass_manager; - pass_manager.register_pass( - runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); - auto func = make_shared(graph, op::ParameterVector{A, B, C}); - pass_manager.run_passes(func); - ASSERT_NE(std::dynamic_pointer_cast(graph->get_argument(0)), nullptr); -} - -TEST(cpu_fusion, cpu_fusion_pass_matmul_bias) -{ - Shape shape_w{2, 4}; - Shape shape_x{4, 1}; - Shape shape_b{1}; - auto W = make_shared(element::f32, shape_w); - auto x = make_shared(element::f32, shape_x); - auto b = make_shared(element::f32, shape_b); - - auto mmb = std::make_shared( - W, x, nullptr, W->get_shape(), x->get_shape(), false, false); - auto broadcast = std::make_shared(b, mmb->get_shape(), AxisSet{0}); - auto add = mmb + broadcast; - - auto graph = make_shared(add); - pass::Manager pass_manager; - pass_manager.register_pass( - runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); - auto func = make_shared(graph, op::ParameterVector{W, x, b}); - pass_manager.run_passes(func); - auto gmm = graph->get_argument(0); - ASSERT_TRUE(std::dynamic_pointer_cast(gmm)); - ASSERT_EQ(gmm->get_argument(2), b); -} - -TEST(cpu_fusion, cpu_fusion_pass_matmul_no_bias) -{ - Shape shape_w{4, 2}; - Shape shape_x{1, 4}; - auto W = make_shared(element::f32, shape_w); - auto x = make_shared(element::f32, shape_x); - - auto reshape_w = std::make_shared(W, AxisVector{1, 0}, Shape{2, 4}); - auto reshape_x = std::make_shared(x, AxisVector{1, 0}, Shape{4, 1}); - auto re_dot = make_shared(reshape_w, reshape_x); - auto graph = make_shared(re_dot); - - pass::Manager pass_manager; - pass_manager.register_pass( - runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); - auto func = make_shared(graph, op::ParameterVector{W, x}); - pass_manager.run_passes(func); - size_t mmb = count_ops_of_type(func); - ASSERT_EQ(mmb, 1); -} - -TEST(cpu_fusion, gemm_mlp) -{ - const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json"); - const string json_string = file_util::read_file_to_string(json_path); - stringstream ss(json_string); - shared_ptr func = ngraph::deserialize(ss); - pass::Manager pass_manager; - pass_manager.register_pass( - runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); - pass_manager.run_passes(func); - auto mmbs = count_ops_of_type(func); - ASSERT_EQ(mmbs, 3); -} - -TEST(cpu_fusion, fuse_fprop_bn) -{ - pass::Manager pass_manager; - pass_manager.register_pass("bn_fprop_before_fusion.png"); - pass_manager.register_pass(); - pass_manager.register_pass( - runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); - pass_manager.register_pass("bn_fprop_after_fusion.png"); - const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/bn_fprop_b2c3h2w2.json"); - const string json_string = file_util::read_file_to_string(json_path); - stringstream ss(json_string); - shared_ptr func = ngraph::deserialize(ss); - pass_manager.run_passes(func); - size_t ccg = count_ops_of_type(func); - ASSERT_EQ(ccg, 1); -} - -TEST(cpu_fusion, zero_padded_reshaped_conv) -{ - auto X = make_shared(element::f32, Shape{1, 2, 2, 1}); - auto F = make_shared(element::f32, Shape{1, 1, 1, 1}); - - auto pad_value = op::Constant::create(element::f32, Shape{}, std::vector{0.0f}); - - auto pad = - make_shared(X, pad_value, Shape{0, 1, 0, 0}, Shape{0, 0, 1, 0}, Shape{0, 0, 0, 0}); - - auto reshape = make_shared(pad, AxisVector{0, 3, 1, 2}, Shape{1, 1, 3, 3}); - - auto conv = make_shared(reshape, - F, - Strides{1, 1}, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - - auto func = make_shared(conv, op::ParameterVector{X, F}); - - ASSERT_EQ(count_ops_of_type(func), 1); - - auto backend = runtime::Backend::create("CPU"); - backend->compile(func); - - ASSERT_EQ(count_ops_of_type(func), 0); -} - -TEST(cpu_fusion, zero_padded_conv) -{ - auto X = make_shared(element::f32, Shape{1, 1, 2, 2}); - auto F = make_shared(element::f32, Shape{1, 1, 1, 1}); - - auto pad_value = op::Constant::create(element::f32, Shape{}, std::vector{0.0f}); - - auto pad = - make_shared(X, pad_value, Shape{0, 0, 0, 1}, Shape{0, 0, 1, 0}, Shape{0, 0, 0, 0}); - - auto conv = make_shared(pad, - F, - Strides{1, 1}, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - - auto func = make_shared(conv, op::ParameterVector{X, F}); - - ASSERT_EQ(count_ops_of_type(func), 1); - - auto backend = runtime::Backend::create("CPU"); - backend->compile(func); - - ASSERT_EQ(count_ops_of_type(func), 0); -} - -TEST(cpu_fusion, non_zero_padded_conv) -{ - auto X = make_shared(element::f32, Shape{1, 1, 2, 2}); - auto F = make_shared(element::f32, Shape{1, 1, 1, 1}); - - auto pad_value = op::Constant::create(element::f32, Shape{}, std::vector{1.0f}); - - auto pad = - make_shared(X, pad_value, Shape{0, 0, 0, 1}, Shape{0, 0, 1, 0}, Shape{0, 0, 0, 0}); - - auto conv = make_shared(pad, - F, - Strides{1, 1}, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - - auto func = make_shared(conv, op::ParameterVector{X, F}); - - ASSERT_EQ(count_ops_of_type(func), 1); - - auto backend = runtime::Backend::create("CPU"); - backend->compile(func); - - ASSERT_EQ(count_ops_of_type(func), 1); -} - -TEST(cpu_fusion, zero_padded_conv_backprop_filters) -{ - auto X = make_shared(element::f32, Shape{1, 1, 2, 2}); - auto F = make_shared(element::f32, Shape{1, 1, 2, 2}); - - auto pad_value = op::Constant::create(element::f32, Shape{}, std::vector{0.0f}); - - auto pad = - make_shared(X, pad_value, Shape{0, 0, 0, 1}, Shape{0, 0, 1, 0}, Shape{0, 0, 0, 0}); - - auto conv = make_shared(pad, - Shape{1, 1, 2, 2}, - F, - Strides{1, 1}, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - - auto func = make_shared(conv, op::ParameterVector{X, F}); - - ASSERT_EQ(count_ops_of_type(func), 1); - - auto backend = runtime::Backend::create("CPU"); - backend->compile(func); - - ASSERT_EQ(count_ops_of_type(func), 0); -} - -TEST(cpu_fusion, fuse_conv_bias) -{ - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.register_pass( - runtime::cpu::pass::CPUFusion::DIFFERENTIABLE_FUSIONS); - const string json_path = file_util::path_join(SERIALIZED_ZOO, "conv_bias.json"); - const string json_string = file_util::read_file_to_string(json_path); - stringstream ss(json_string); - shared_ptr func = ngraph::deserialize(ss); - pass_manager.run_passes(func); - size_t cb = count_ops_of_type(func); - ASSERT_GT(cb, 0); -} - -struct ConvolutionBiasTestData -{ - size_t n{0}; - size_t c{0}; - size_t filter{0}; - size_t kernel_size{0}; - size_t w{0}; - size_t h{0}; - shared_ptr data_val; - shared_ptr weights_val; - shared_ptr bias_val; - shared_ptr result_val; - shared_ptr delta_val; - shared_ptr d_data_val; - shared_ptr d_weights_val; - shared_ptr d_bias_val; - vector expected_result_val; - vector expected_d_data_val; - vector expected_d_weights_val; - vector expected_d_bias_val; - - Shape data_shape; - Shape weights_shape; - Shape bias_shape; - Shape result_shape; - shared_ptr data; - shared_ptr weights; - shared_ptr bias; - shared_ptr delta; - - void n1c1h3w3(runtime::Backend* backend) - { - n = 1; - c = 1; - filter = 1; - kernel_size = 3; - w = 3; - h = w; - - data_shape = Shape{n, c, h, w}; - data = make_shared(element::f32, data_shape); - weights_shape = Shape{filter, c, kernel_size, kernel_size}; - weights = make_shared(element::f32, weights_shape); - bias_shape = Shape{filter}; - bias = make_shared(element::f32, bias_shape); - result_shape = Shape{n, filter, 1, 1}; - - data_val = backend->create_tensor(element::f32, data_shape); - copy_data(data_val, - vector{-0.67765152f, - 0.10073948f, - 0.57595438f, - -0.3469252f, - -0.22134334f, - -1.80471897f, - -0.80642909f, - 1.22033095f, - 2.23235631f}); - weights_val = backend->create_tensor(element::f32, weights_shape); - copy_data(weights_val, - vector{0.20070229f, - -0.54968649f, - -0.19819015f, - -0.38577855f, - 1.37109005f, - -0.23789984f, - 0.14867957f, - -0.49851316f, - -0.84815776f}); - bias_val = backend->create_tensor(element::f32, bias_shape); - copy_data(bias_val, vector{0.07811152f}); - - result_val = backend->create_tensor(element::f32, result_shape); - copy_data(result_val, vector{0}); - - delta = make_shared(element::f32, result_shape); - delta_val = backend->create_tensor(element::f32, result_shape); - copy_data(delta_val, vector{-2.58936238f}); - - d_data_val = backend->create_tensor(element::f32, data_shape); - copy_data(d_data_val, vector{0, 0, 0, 0, 0, 0, 0, 0, 0}); - - d_weights_val = backend->create_tensor(element::f32, weights_shape); - copy_data(d_weights_val, vector{0, 0, 0, 0, 0, 0, 0, 0, 0}); - - d_bias_val = backend->create_tensor(element::f32, bias_shape); - copy_data(d_bias_val, vector{0}); - - expected_result_val = vector{-2.58936238f}; - expected_d_data_val = vector{-0.51969099f, - 1.42333758f, - 0.5131861f, - 0.99892044f, - -3.5502491f, - 0.61600888f, - -0.3849853f, - 1.29083121f, - 2.19618773f}; - expected_d_weights_val = vector{1.7546854f, - -0.26085103f, - -1.49135458f, - 0.89831507f, - 0.57313812f, - 4.67307138f, - 2.08813715f, - -3.15987897f, - -5.7803793f}; - expected_d_bias_val = vector{-2.58936238f}; - } -}; - -TEST(cpu_fusion, conv_bias_fprop_n1c1h3w3) -{ - auto backend = runtime::Backend::create("CPU"); - - ConvolutionBiasTestData conv_test; - conv_test.n1c1h3w3(backend.get()); - - auto convolution = make_shared(conv_test.data, conv_test.weights); - auto convolution_bias = make_shared(convolution, conv_test.bias); - - auto f = make_shared( - convolution_bias, op::ParameterVector{conv_test.data, conv_test.weights, conv_test.bias}); - - backend->call_with_validate( - f, {conv_test.result_val}, {conv_test.data_val, conv_test.weights_val, conv_test.bias_val}); - auto result_vec = read_vector(conv_test.result_val); - - EXPECT_TRUE( - test::all_close(conv_test.expected_result_val, read_vector(conv_test.result_val))); -} - -TEST(cpu_fusion, conv_bias_bprop_n1c1h3w3) -{ - auto backend = runtime::Backend::create("CPU"); - - ConvolutionBiasTestData conv_test; - conv_test.n1c1h3w3(backend.get()); - - auto convolution = make_shared(conv_test.data, conv_test.weights); - auto convolution_bias = make_shared(convolution, conv_test.bias); - - auto f = make_shared( - convolution_bias, op::ParameterVector{conv_test.data, conv_test.weights, conv_test.bias}); - - ngraph::autodiff::Adjoints adjoints(NodeVector{convolution_bias}, NodeVector{conv_test.delta}); - - auto d_data = adjoints.backprop_node(conv_test.data); - auto d_weights = adjoints.backprop_node(conv_test.weights); - auto d_bias = adjoints.backprop_node(conv_test.bias); - - auto df = make_shared( - NodeVector{d_data, d_weights, d_bias}, - op::ParameterVector{conv_test.data, conv_test.weights, conv_test.bias, conv_test.delta}); - backend->call_with_validate( - df, - {conv_test.d_data_val, conv_test.d_weights_val, conv_test.d_bias_val}, - {conv_test.data_val, conv_test.weights_val, conv_test.bias_val, conv_test.delta_val}); - - EXPECT_TRUE( - test::all_close(conv_test.expected_d_data_val, read_vector(conv_test.d_data_val))); - EXPECT_TRUE(test::all_close(conv_test.expected_d_weights_val, - read_vector(conv_test.d_weights_val))); - EXPECT_TRUE( - test::all_close(conv_test.expected_d_bias_val, read_vector(conv_test.d_bias_val))); -} - -TEST(cpu_fusion, conv_bias_bprop) -{ - Shape shape{2, 2, 1, 1}; - auto data_batch = std::make_shared(element::f32, shape); - auto filters = std::make_shared(element::f32, shape); - auto delta = std::make_shared(element::f32, shape); - auto bias = make_shared(element::f32, Shape{shape[0]}); - auto pbroadcast = std::make_shared(bias, shape, AxisSet{1, 2, 3}); - auto conv = std::make_shared(data_batch, filters); - auto conv_bias = std::make_shared(conv, pbroadcast); - - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.register_pass("conv_bias_bprop_fusion"); - auto f = make_shared(conv_bias, op::ParameterVector{data_batch, filters, bias}); - - ngraph::autodiff::Adjoints adjoints(NodeVector{conv_bias}, NodeVector{delta}); - - auto d_data = adjoints.backprop_node(data_batch); - auto d_weights = adjoints.backprop_node(filters); - auto d_bias = adjoints.backprop_node(bias); - - auto df = make_shared(NodeVector{d_data, d_weights, d_bias}, - op::ParameterVector{data_batch, filters, bias, delta}); - - pass_manager.run_passes(df); - size_t ccg = count_ops_of_type(df); - ASSERT_EQ(ccg, 1); -} - -TEST(cpu_fusion, batchnorm_fprop_relu_b1c2h2w2) -{ - auto input_shape = Shape{1, 2, 2, 2}; - auto input = make_shared(element::f32, input_shape); - auto mean_shape = Shape{2}; - auto var_shape = Shape{2}; - auto gamma_shape = Shape{2}; - auto gamma = make_shared(element::f32, gamma_shape); - auto beta_shape = Shape{2}; - auto beta = make_shared(element::f32, beta_shape); - double eps = 0.001; - auto shape_r = Shape{1, 2, 2, 2}; - auto bn = make_shared(input, gamma, beta, eps); - - auto output_rt = std::make_shared(bn, 0); - // Note, op::Splice is used to break Relu(BatchNorm) fusion - // otherwise we will be comparing two BatchNormRelus - // Unfortunately, we can't use INTERPRETER for - // verifying the results as it doesn't implement - // BatchNorm op. - auto slice = - std::make_shared(output_rt, Coordinate{0, 0, 0, 0}, Coordinate{1, 2, 2, 2}); - auto output_relu = std::make_shared(slice); - auto mean_rt = std::make_shared(bn, 1); - auto variance_rt = std::make_shared(bn, 2); - - auto bn_relu = make_shared(input, gamma, beta, eps); - auto output_rt_bnr = std::make_shared(bn_relu, 0); - auto mean_rt_bnr = std::make_shared(bn_relu, 1); - auto variance_rt_bnr = std::make_shared(bn_relu, 2); - - auto f = make_shared( - NodeVector{output_relu, mean_rt, variance_rt, output_rt_bnr, mean_rt_bnr, variance_rt_bnr}, - op::ParameterVector{input, gamma, beta}); - auto backend = runtime::Backend::create("CPU"); - - // Create some tensors for input/output - auto input_t = backend->create_tensor(element::f32, Shape{1, 2, 2, 2}); - - copy_data(input_t, - vector{0.54881352f, - 0.71518934f, - 0.60276335f, - 0.54488319f, - 0.42365479f, - 0.64589411f, - 0.4375872f, - 0.89177299f}); - auto gamma_t = backend->create_tensor(element::f32, gamma_shape); - copy_data(gamma_t, vector{1.0f, 1.0f}); - auto beta_t = backend->create_tensor(element::f32, beta_shape); - copy_data(beta_t, vector{0.0f, 0.0f}); - auto bn_output = backend->create_tensor(element::f32, shape_r); - auto result_mean = backend->create_tensor(element::f32, mean_shape); - auto result_variance = backend->create_tensor(element::f32, var_shape); - - auto bn_output_bnr = backend->create_tensor(element::f32, shape_r); - auto result_mean_bnr = backend->create_tensor(element::f32, mean_shape); - auto result_variance_bnr = backend->create_tensor(element::f32, var_shape); - - backend->call_with_validate(f, - {bn_output, - result_mean, - result_variance, - bn_output_bnr, - result_mean_bnr, - result_variance_bnr}, - {input_t, gamma_t, beta_t}); - - EXPECT_TRUE(test::all_close(read_vector(bn_output), read_vector(bn_output_bnr))); - EXPECT_TRUE( - test::all_close(read_vector(result_mean), read_vector(result_mean_bnr))); - EXPECT_TRUE(test::all_close(read_vector(result_variance), - read_vector(result_variance_bnr))); -} - -TEST(cpu_fusion, fuse_conv_relu) -{ - auto A = std::make_shared(element::f32, Shape{2, 1, 2, 2}); - auto weights = std::make_shared(element::f32, Shape{1, 1, 2, 2}); - auto convolution = std::make_shared(A, weights, Strides{1, 1}, Strides{1, 1}); - auto relu = std::make_shared(convolution); - auto abs_node = - std::make_shared(std::make_shared(std::make_shared(relu))); - auto func = make_shared(abs_node, op::ParameterVector{A, weights}); - - pass::Manager pass_manager; - pass_manager.register_pass( - runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); - pass_manager.run_passes(func); - size_t cb = count_ops_of_type(func); - ASSERT_GT(cb, 0); -} - -TEST(cpu_fusion, conv_relu_n2c1h2w2_2) -{ - Shape shape_a{2, 1, 6, 6}; - Shape shape_weights{1, 1, 2, 2}; - - auto make_int_function = [shape_a, shape_weights]() { - auto A = std::make_shared(element::f32, shape_a); - auto weights = std::make_shared(element::f32, shape_weights); - auto conv = std::make_shared(A, weights, Strides{2, 2}, Strides{1, 1}); - auto relu = std::make_shared(conv); - auto f = make_shared(NodeVector{relu}, op::ParameterVector{A, weights}); - return f; - }; - - auto int_f = make_int_function(); - - auto make_cpu_function = [shape_a, shape_weights]() { - auto A = std::make_shared(element::f32, shape_a); - auto weights = std::make_shared(element::f32, shape_weights); - auto conv = std::make_shared(A, weights, Strides{2, 2}, Strides{1, 1}); - auto conv_relu = std::make_shared(conv); - auto f = make_shared(NodeVector{conv_relu}, op::ParameterVector{A, weights}); - return f; - }; - - auto cpu_f = make_cpu_function(); - - vector> args{ - {1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f, 7.25f, 8.25f, -1.25f, - -1.25f, 1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f, -4.25f, 2.25f, - 4.25f, 4.25f, 0.f, 0.f, -1.f, 0.f, 2.f, 2.f, 0.f, 0.f, 0.f, - 0.f, 2.f, 2.f, 1.25f, 2.25f, 5.25f, 6.25f, 1.25f, 1.25f, 3.25f, 4.25f, - -7.25f, 8.25f, 1.25f, -1.25f, -1.25f, 2.25f, 3.25f, 2.25f, -4.25f, -4.25f, -1.25f, - -2.25f, 4.25f, 2.25f, 4.25f, 4.25f, 0.f, 0.f, 1.f, 0.f, -2.f, 2.f, - 0.f, 0.f, 0.f, 0.f, -2.f, -2.f}, - {2., 2., 2., 2.}}; - - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); -} - -TEST(cpu_fusion, conv_bias_relu_n2c1h2w2_2) -{ - Shape shape_a{2, 1, 6, 6}; - Shape shape_weights{1, 1, 2, 2}; - Shape shape_bias{1}; - - auto make_int_function = [shape_a, shape_weights, shape_bias]() { - auto A = std::make_shared(element::f32, shape_a); - auto weights = std::make_shared(element::f32, shape_weights); - auto conv = std::make_shared(A, weights, Strides{2, 2}, Strides{1, 1}); - auto bias = std::make_shared(element::f32, shape_bias); - auto conv_bias = - conv + std::make_shared(bias, conv->get_shape(), AxisSet{0, 2, 3}); - auto relu = std::make_shared(conv_bias); - auto f = make_shared(NodeVector{relu}, op::ParameterVector{A, weights, bias}); - return f; - }; - - auto int_f = make_int_function(); - - auto make_cpu_function = [shape_a, shape_weights, shape_bias]() { - auto A = std::make_shared(element::f32, shape_a); - auto weights = std::make_shared(element::f32, shape_weights); - auto bias = std::make_shared(element::f32, shape_bias); - auto conv = std::make_shared(A, weights, Strides{2, 2}, Strides{1, 1}); - auto conv_bias_relu = std::make_shared(conv, bias, true); - auto f = make_shared(NodeVector{conv_bias_relu}, - op::ParameterVector{A, weights, bias}); - return f; - }; - - auto cpu_f = make_cpu_function(); - - vector> args{ - {1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f, 7.25f, 8.25f, -1.25f, - -1.25f, 1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f, -4.25f, 2.25f, - 4.25f, 4.25f, 0.f, 0.f, -1.f, 0.f, 2.f, 2.f, 0.f, 0.f, 0.f, - 0.f, 2.f, 2.f, 1.25f, 2.25f, 5.25f, 6.25f, 1.25f, 1.25f, 3.25f, 4.25f, - -7.25f, 8.25f, 1.25f, -1.25f, -1.25f, 2.25f, 3.25f, 2.25f, -4.25f, -4.25f, -1.25f, - -2.25f, 4.25f, 2.25f, 4.25f, 4.25f, 0.f, 0.f, 1.f, 0.f, -2.f, 2.f, - 0.f, 0.f, 0.f, 0.f, -2.f, -2.f}, - {2., 2., 2., 2.}, - {0.1f}}; - - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); -} - -TEST(cpu_fusion, conv_horizontal_fusion) -{ - Shape shape_a{2, 1, 6, 6}; - Shape shape_weights{1, 1, 2, 2}; - Shape shape_bias{1}; - - auto make_function = [shape_a, shape_weights, shape_bias]() { - auto A = std::make_shared(element::f32, shape_a); - auto weights1 = std::make_shared(element::f32, shape_weights); - auto conv1 = std::make_shared(A, weights1, Strides{2, 2}, Strides{1, 1}); - auto bias1 = std::make_shared(element::f32, shape_bias); - auto conv_bias1 = - conv1 + std::make_shared(bias1, conv1->get_shape(), AxisSet{0, 2, 3}); - auto relu1 = std::make_shared(conv_bias1); - - auto weights2 = std::make_shared(element::f32, shape_weights); - auto conv2 = std::make_shared(A, weights2, Strides{2, 2}, Strides{1, 1}); - auto bias2 = std::make_shared(element::f32, shape_bias); - auto conv_bias2 = - conv2 + std::make_shared(bias2, conv2->get_shape(), AxisSet{0, 2, 3}); - auto relu2 = std::make_shared(conv_bias2); - - auto concat = std::make_shared(NodeVector{relu1, relu2}, 1); - auto f = make_shared(NodeVector{concat}, - op::ParameterVector{A, weights1, bias1, weights2, bias2}); - return f; - }; - auto int_f = make_function(); - auto cpu_f = make_function(); - - vector> args{ - {1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f, 7.25f, 8.25f, -1.25f, - -1.25f, 1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f, -4.25f, 2.25f, - 4.25f, 4.25f, 0.f, 0.f, -1.f, 0.f, 2.f, 2.f, 0.f, 0.f, 0.f, - 0.f, 2.f, 2.f, 1.25f, 2.25f, 5.25f, 6.25f, 1.25f, 1.25f, 3.25f, 4.25f, - -7.25f, 8.25f, 1.25f, -1.25f, -1.25f, 2.25f, 3.25f, 2.25f, -4.25f, -4.25f, -1.25f, - -2.25f, 4.25f, 2.25f, 4.25f, 4.25f, 0.f, 0.f, 1.f, 0.f, -2.f, 2.f, - 0.f, 0.f, 0.f, 0.f, -2.f, -2.f}, - {2., 2., 2., 2.}, - {0.1f}, - {3., 3., 3., 3.}, - {0.2f}}; - - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); - - size_t cpu_cb = count_ops_of_type(cpu_f); - ASSERT_EQ(cpu_cb, 1); -} - -// ConvolutionBiasAdd relies on an in-place fused MKLDNN kernel. -// Need to ensure that it is fused only when in-place buffer allocation is feasible -shared_ptr gen_conv_bias_add(bool param_input, bool result_output) -{ - auto A = make_shared(element::f32, Shape{2, 1, 2, 2}); - auto weights = make_shared(element::f32, Shape{1, 1, 1, 1}); - auto bias = make_shared(element::f32, Shape{1}); - auto conv = make_shared(A, weights, Strides{1, 1}, Strides{1, 1}); - auto bias_broadcast = make_shared(bias, conv->get_shape(), AxisSet{0, 2, 3}); - auto convbias = conv + bias_broadcast; - auto B = make_shared(element::f32, Shape{2, 1, 2, 2}); - auto abs_B = make_shared(B); - auto add = - param_input ? make_shared(convbias, B) : make_shared(convbias, abs_B); - auto abs = make_shared(add); - - return result_output ? make_shared(add, op::ParameterVector{A, weights, bias, B}) - : make_shared(abs, op::ParameterVector{A, weights, bias, B}); -} - -TEST(cpu_fusion, fuse_conv_bias_add) -{ - auto func_fuse = gen_conv_bias_add(false, false); - auto func_nofuse1 = gen_conv_bias_add(true, false); - auto func_nofuse2 = gen_conv_bias_add(false, true); - - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.run_passes(func_fuse); - ASSERT_EQ(count_ops_of_type(func_fuse), 1); - - pass_manager.run_passes(func_nofuse1); - ASSERT_EQ(count_ops_of_type(func_nofuse1), 0); - - pass_manager.run_passes(func_nofuse2); - ASSERT_EQ(count_ops_of_type(func_nofuse2), 1); -} - -TEST(cpu_fusion, conv_bias_add) -{ - auto int_f = gen_conv_bias_add(false, false); - auto cpu_f = gen_conv_bias_add(false, false); - - vector> args{{1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f}, - {-1.25f}, - {2.25f}, - {1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f}}; - - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); -} - -// ConvolutionAdd relies on an in-place fused MKLDNN kernel. -// Need to ensure that it is fused only when in-place buffer allocation is feasible -shared_ptr gen_conv_add(bool param_input, bool result_output) -{ - auto A = make_shared(element::f32, Shape{2, 1, 2, 2}); - auto weights = make_shared(element::f32, Shape{1, 1, 1, 1}); - auto conv = make_shared(A, weights, Strides{1, 1}, Strides{1, 1}); - auto B = make_shared(element::f32, Shape{2, 1, 2, 2}); - auto abs_B = make_shared(B); - auto add = param_input ? make_shared(conv, B) : make_shared(conv, abs_B); - auto abs = make_shared(add); - - return result_output ? make_shared(add, op::ParameterVector{A, weights, B}) - : make_shared(abs, op::ParameterVector{A, weights, B}); -} - -TEST(cpu_fusion, fuse_conv_add) -{ - auto func_fuse = gen_conv_add(false, false); - auto func_nofuse1 = gen_conv_add(true, false); - auto func_nofuse2 = gen_conv_add(false, true); - - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.run_passes(func_fuse); - ASSERT_EQ(count_ops_of_type(func_fuse), 1); - - pass_manager.run_passes(func_nofuse1); - ASSERT_EQ(count_ops_of_type(func_nofuse1), 0); - - pass_manager.run_passes(func_nofuse2); - ASSERT_EQ(count_ops_of_type(func_nofuse2), 1); -} - -TEST(cpu_fusion, conv_add) -{ - auto int_f = gen_conv_add(false, false); - auto cpu_f = gen_conv_add(false, false); - - vector> args{{1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f}, - {-1.25f}, - {1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f}}; - - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); - - int_f = gen_conv_add(false, true); - cpu_f = gen_conv_add(false, true); - - int_results = execute(int_f, args, "INTERPRETER"); - cpu_results = execute(cpu_f, args, "CPU"); - EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); -} - -shared_ptr gen_groupconv_batchnorm(const bool add_goe, - const bool with_relu, - const Shape shape_in, - const Shape shape_weights, - const Shape shape_out, - const size_t groups) -{ - auto input = make_shared(element::f32, shape_in); - auto weights = make_shared(element::f32, shape_weights); - - unsigned long OC = shape_out.at(1); - Shape shape_bn{OC}; - auto group_conv = make_shared(input, - weights, - Strides{1, 1}, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}, - groups, - shape_out); - - double eps = 0.001; - auto gamma = std::make_shared(element::f32, shape_bn); - auto beta = std::make_shared(element::f32, shape_bn); - auto mean = std::make_shared(element::f32, shape_bn); - auto var = std::make_shared(element::f32, shape_bn); - - auto goe_bn = std::make_shared(group_conv, 0); - - // Adding a goe will stop fusion since the patterns wont expect to see this op - auto bn = - add_goe ? std::make_shared(goe_bn, gamma, beta, mean, var, eps) - : std::make_shared(group_conv, gamma, beta, mean, var, eps); - if (with_relu) - { - auto prelu = std::make_shared(bn); - auto f = make_shared(NodeVector{prelu}, - op::ParameterVector{input, weights, gamma, beta, mean, var}); - return f; - } - else - { - auto f = make_shared(NodeVector{bn}, - op::ParameterVector{input, weights, gamma, beta, mean, var}); - return f; - } -} - -void fuse_groupconv_batchnorm_helper(Shape shape_in, - Shape shape_weights, - Shape shape_r, - size_t groups) -{ - auto func_fuse = - gen_groupconv_batchnorm(false, false, shape_in, shape_weights, shape_r, groups); - auto func_fuse2 = - gen_groupconv_batchnorm(false, true, shape_in, shape_weights, shape_r, groups); - - { - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.run_passes(func_fuse); - ASSERT_EQ(count_ops_of_type(func_fuse), 1); - } - - { - // test groupconv + batchnorm + relu fusion - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.run_passes(func_fuse2); - ASSERT_EQ(count_ops_of_type(func_fuse2), 1); - ASSERT_EQ(count_ops_of_type(func_fuse2), 0); - } -} - -void groupconv_batchnorm_test_val_helper( - const bool with_relu, Shape shape_in, Shape shape_weights, Shape shape_r, size_t groups) -{ - shared_ptr fuse_func = - gen_groupconv_batchnorm(false, with_relu, shape_in, shape_weights, shape_r, groups); - shared_ptr nofuse_func = - gen_groupconv_batchnorm(true, with_relu, shape_in, shape_weights, shape_r, groups); - - test::Uniform rng(1.0f, 100.0f); - vector> args; - for (shared_ptr param : fuse_func->get_parameters()) - { - vector tensor_val(shape_size(param->get_shape())); - rng.initialize(tensor_val); - args.push_back(tensor_val); - } - - auto fuse_results = execute(fuse_func, args, "CPU"); - auto nofuse_results = execute(nofuse_func, args, "CPU"); - - EXPECT_TRUE(test::all_close(fuse_results.at(0), nofuse_results.at(0))); -} - -TEST(cpu_fusion, fuse_groupconv_batchnorm1) -{ - Shape shape_in{1, 20, 5, 5}; - Shape shape_weights{8, 10, 3, 3}; - Shape shape_r{1, 8, 3, 3}; - fuse_groupconv_batchnorm_helper(shape_in, shape_weights, shape_r, 2); - groupconv_batchnorm_test_val_helper(false, shape_in, shape_weights, shape_r, 2); - groupconv_batchnorm_test_val_helper(true, shape_in, shape_weights, shape_r, 2); -} - -TEST(cpu_fusion, fuse_groupconv_batchnorm2) -{ - Shape shape_in{1, 20, 5, 5}; - Shape shape_weights{5, 4, 3, 3}; - Shape shape_r{1, 5, 3, 3}; - fuse_groupconv_batchnorm_helper(shape_in, shape_weights, shape_r, 5); - groupconv_batchnorm_test_val_helper(false, shape_in, shape_weights, shape_r, 5); - groupconv_batchnorm_test_val_helper(true, shape_in, shape_weights, shape_r, 5); -} - -TEST(cpu_fusion, fuse_groupconv_batchnorm3) -{ - Shape shape_in{1, 20, 5, 5}; - Shape shape_weights{20, 1, 3, 3}; - Shape shape_r{1, 20, 3, 3}; - fuse_groupconv_batchnorm_helper(shape_in, shape_weights, shape_r, 20); - groupconv_batchnorm_test_val_helper(false, shape_in, shape_weights, shape_r, 20); - groupconv_batchnorm_test_val_helper(true, shape_in, shape_weights, shape_r, 20); -} - -TEST(cpu_fusion, fuse_groupconv_batchnorm4) -{ - Shape shape_in{1, 20, 4, 4}; - Shape shape_weights{5, 20, 1, 1}; - Shape shape_r{1, 5, 4, 4}; - fuse_groupconv_batchnorm_helper(shape_in, shape_weights, shape_r, 1); - groupconv_batchnorm_test_val_helper(false, shape_in, shape_weights, shape_r, 1); - groupconv_batchnorm_test_val_helper(true, shape_in, shape_weights, shape_r, 1); -} - -std::vector> rnn_matrix_fusion_eval(const size_t time_steps, - const Shape& data_shape, - const Shape& weights_shape, - const Shape& bias_shape, - const vector& data_val, - const vector& weights_val, - const vector& bias_val, - const bool enable_pass) -{ - auto data = make_shared(element::f32, data_shape); - auto weights = make_shared(element::f32, weights_shape); - auto bias = make_shared(element::f32, bias_shape); - - // results from each time step - NodeVector results; - for (size_t t = 0; t < time_steps; ++t) - { - auto data_slice = make_shared( - data, Coordinate{0, t, 0}, Coordinate{data_shape[0], t + 1, data_shape[2]}); - auto data_reshape = make_shared( - data_slice, AxisVector{0, 1, 2}, Shape{data_shape[0], data_shape[2]}); - auto weights_reshape = make_shared( - weights, AxisVector{1, 0}, Shape{weights_shape[1], weights_shape[0]}); - auto dot = make_shared(data_reshape, weights_reshape); - auto bias_broadcast = make_shared(bias, dot->get_shape(), AxisSet{0}); - auto add = make_shared(dot, bias_broadcast); - results.push_back(add); - } - auto func = make_shared(results, op::ParameterVector{data, weights, bias}); - if (enable_pass) - { - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.register_pass( - runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); - pass_manager.run_passes(func); - // check all of our dot/add are converted to a single MatmulBias op. - size_t count = count_ops_of_type(func); - EXPECT_EQ(count, 1); - } - - auto backend = runtime::Backend::create("CPU"); - - shared_ptr data_tensor = - backend->create_tensor(element::f32, data->get_shape()); - shared_ptr weights_tensor = - backend->create_tensor(element::f32, weights->get_shape()); - shared_ptr bias_tensor = - backend->create_tensor(element::f32, bias->get_shape()); - - std::vector> result_tensors; - for (auto r : results) - { - result_tensors.push_back(backend->create_tensor(element::f32, r->get_shape())); - } - - copy_data(data_tensor, data_val); - copy_data(weights_tensor, weights_val); - copy_data(bias_tensor, bias_val); - backend->call_with_validate(func, result_tensors, {data_tensor, weights_tensor, bias_tensor}); - return result_tensors; -} - -TEST(cpu_fusion, rnn_matrix_fusion_eval_pass) -{ - const size_t time_steps = 4; - Shape data_shape{3, time_steps, 5}; - Shape weights_shape{6, data_shape[2]}; - Shape bias_shape{6}; - - test::Uniform rng{0, 1, 0}; - vector data_val(shape_size(data_shape)); - vector weights_val(shape_size(weights_shape)); - vector bias_val(shape_size(bias_shape)); - rng.initialize(data_val); - rng.initialize(weights_val); - rng.initialize(bias_val); - - std::vector> result_expected = rnn_matrix_fusion_eval( - time_steps, data_shape, weights_shape, bias_shape, data_val, weights_val, bias_val, false); - std::vector> result_fused = rnn_matrix_fusion_eval( - time_steps, data_shape, weights_shape, bias_shape, data_val, weights_val, bias_val, true); - for (size_t i = 0; i < result_expected.size(); ++i) - { - EXPECT_TRUE(test::all_close(result_expected[i], result_fused[i])); - } -} - -TEST(cpu_fusion, rnn_fusion_from_json_model) -{ - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.register_pass( - runtime::cpu::pass::CPUFusion::REGULAR_FUSIONS); - const string json_path = - file_util::path_join(SERIALIZED_ZOO, "mxnet/rnn-10-step-fusion-test.json"); - const string json_string = file_util::read_file_to_string(json_path); - stringstream ss(json_string); - shared_ptr func = ngraph::deserialize(ss); - pass_manager.run_passes(func); - const size_t NUM_STEPS = 10; - auto mmb_predicate = [](std::shared_ptr node) { - auto users = node->get_users(); - return users.size() == NUM_STEPS && - std::all_of(begin(users), end(users), [](std::shared_ptr n) { - return std::dynamic_pointer_cast(n) != nullptr; - }); - }; - - auto mmbs = get_ops_of_type(func); - ASSERT_TRUE(std::any_of(begin(mmbs), end(mmbs), mmb_predicate)); -} - -TEST(cpu_fusion, weight_fusion) -{ - auto param = std::make_shared(element::f32, Shape{64}); - auto reshape_conv = - std::make_shared(param, AxisVector{0}, Shape{16, 4, 1, 1}); - auto data_conv = std::make_shared(element::f32, Shape{16, 4, 7, 7}); - auto tvt = reshape_conv->get_outputs().at(0).get_tensor_ptr().get(); - auto lt_desc = std::make_shared(*tvt); - auto cvt_lt_conv = std::make_shared(reshape_conv, lt_desc); - auto conv = std::make_shared( - data_conv, cvt_lt_conv, Strides{1, 1}, Strides{1, 1}); - - auto reshape_conv_bprop = - std::make_shared(param, AxisVector{0}, Shape{16, 4, 1, 1}); - auto dummy_arg_conv_bprop = std::make_shared(element::f32, Shape{1, 16, 7, 7}); - auto tvt_bprop = reshape_conv_bprop->get_outputs().at(0).get_tensor_ptr().get(); - auto lt_desc_bprop = std::make_shared(*tvt_bprop); - auto cvt_lt_conv_bprop = - std::make_shared(reshape_conv_bprop, lt_desc_bprop); - auto conv_bprop = std::make_shared(Shape{1, 4, 7, 7}, - cvt_lt_conv_bprop, - dummy_arg_conv_bprop, - Strides{1, 1}, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - - auto conv_relu = std::make_shared(conv); - auto conv_bprop_abs = std::make_shared(conv_bprop); - - auto f = make_shared(NodeVector{conv_relu, conv_bprop_abs}, - op::ParameterVector{param, data_conv, dummy_arg_conv_bprop}); - - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.run_passes(f); - - auto new_conv_bprop_data = conv_bprop_abs->get_argument(0); - auto new_convert_layout = new_conv_bprop_data->get_argument(0); - - ASSERT_EQ(std::dynamic_pointer_cast( - new_convert_layout->get_argument(0)), - cvt_lt_conv); -} - -TEST(cpu_fusion, max_pool_with_indices) -{ - Shape shape_a{10, 3, 28, 28}; - auto input = std::make_shared(element::f32, shape_a); - Shape window_shape{2, 2}; - auto max_pool = std::make_shared(input, window_shape); - auto C = std::make_shared(element::f32, max_pool->get_shape()); - - ngraph::autodiff::Adjoints adjoints(NodeVector{max_pool}, NodeVector{C}); - - auto dinput = adjoints.backprop_node(input); - - auto df = std::make_shared(NodeVector{dinput}, op::ParameterVector{input, C}); - - auto f = std::make_shared(NodeVector{max_pool}, op::ParameterVector{input}); - - { - pass::Manager pass_manager; - pass_manager.register_pass("max_pool_fprop_before.pdf"); - pass_manager.run_passes(f); - } - - { - NodeVector nv_cwi; - pass::Manager pass_manager; - pass_manager.register_pass("max_pool_bprop_before.pdf"); - pass_manager.register_pass(nv_cwi); - pass_manager.register_pass("max_pool_bprop_after.pdf"); - pass_manager.run_passes(df); - } - - { - pass::Manager pass_manager; - pass_manager.register_pass("max_pool_fprop_after.pdf"); - pass_manager.run_passes(f); - } - - auto maxpool_goe_output = - std::dynamic_pointer_cast(f->get_results().at(0)->get_argument(0)); - ASSERT_TRUE(maxpool_goe_output); - ASSERT_EQ(maxpool_goe_output->get_n(), 0); - auto maxpool_with_indices = df->get_results().at(0)->get_argument(0); - auto maxpool_goe_indices = - std::dynamic_pointer_cast(maxpool_with_indices->get_argument(2)); - ASSERT_TRUE(maxpool_goe_indices); - ASSERT_EQ(maxpool_goe_indices->get_n(), 1); -} - -TEST(cpu_fusion, backwards_maxpool_with_indices_n4_c1_hw4_2x2_max) -{ - Shape shape_a{1, 4, 4, 4}; - Shape maxpool_shape{1, 4, 3, 3}; - auto A = std::make_shared(element::f32, shape_a); - Shape window_shape{2, 2}; - auto window_movement_strides = Strides{1, 1}; - auto maxpool = std::make_shared(A, window_shape, window_movement_strides); - auto f = std::make_shared(maxpool, op::ParameterVector{A}); - - auto backend = runtime::Backend::create("CPU"); - shared_ptr ep = backend->create_tensor(element::f32, maxpool_shape); - vector dataEp(shape_size(maxpool_shape), 4); - - shared_ptr input = backend->create_tensor(element::f32, shape_a); - shared_ptr output = backend->create_tensor(element::f32, shape_a); - - vector dataInput{11.f, 31.f, 40.f, 47.f, 13.f, 61.f, 48.f, 59.f, 17.f, 39.f, 64.f, - 62.f, 45.f, 55.f, 36.f, 19.f, 65.f, 33.f, 49.f, 30.f, 56.f, 41.f, - 53.f, 58.f, 22.f, 35.f, 52.f, 50.f, 63.f, 54.f, 12.f, 26.f, 44.f, - 21.f, 69.f, 24.f, 46.f, 25.f, 51.f, 29.f, 72.f, 15.f, 73.f, 10.f, - 16.f, 37.f, 70.f, 32.f, 28.f, 66.f, 57.f, 27.f, 60.f, 42.f, 43.f, - 71.f, 18.f, 38.f, 67.f, 68.f, 14.f, 20.f, 34.f, 23.f}; - - vector expected{0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 12.0f, 0.0f, 4.0f, 0.0f, 0.0f, 16.0f, - 0.0f, 0.0f, 4.0f, 0.0f, 0.0f, 4.0f, 0.0f, 0.0f, 0.0f, 4.0f, 0.0f, - 8.0f, 8.0f, 0.0f, 0.0f, 4.0f, 0.0f, 4.0f, 4.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 8.0f, 0.0f, 4.0f, 0.0f, 0.0f, 0.0f, 8.0f, 0.0f, 16.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 8.0f, 0.0f, 0.0f, 4.0f, 0.0f, 0.0f, - 8.0f, 0.0f, 4.0f, 8.0f, 4.0f, 0.0f, 0.0f, 0.0f, 0.0f}; - - copy_data(ep, dataEp); - copy_data(input, dataInput); - - auto C = std::make_shared(element::f32, maxpool_shape); - auto df = autodiff::backprop_function(f); - - { - NodeVector nv_cwi; - pass::Manager pass_manager; - pass_manager.register_pass("max_pool_bprop_before2.pdf"); - pass_manager.register_pass(nv_cwi); - pass_manager.register_pass("max_pool_bprop_after2.pdf"); - pass_manager.run_passes(df); - } - - backend->call_with_validate(df, {output}, {input, ep}); - ASSERT_TRUE(read_vector(output) == expected); -} - -#if 0 -TEST(cpu_fusion, loop_kernel_one_input_one_output) -{ - Shape shapeA{2, 2}; - auto A = make_shared(element::i32, shapeA); - auto neg_a = make_shared(A); - auto lk = make_shared( - NodeVector{neg_a}, NodeVector{neg_a}, NodeVector{A}); - auto f = make_shared(NodeVector{lk}, op::ParameterVector{A}); - - auto backend = runtime::Backend::create("CPU"); - shared_ptr a = backend->create_tensor(element::i32, shapeA); - shared_ptr result = backend->create_tensor(element::i32, shapeA); - - vector dataA{1, 4, 1, 4}; - copy_data(a, dataA); - vector expected{-1, -4, -1, -4}; - - backend->call_with_validate(f, {result}, {a}); - - EXPECT_EQ(read_vector(result), expected); -} - -TEST(cpu_fusion, loop_kernel_embedded_graph) -{ - Shape shapeA{2, 2}; - auto A = make_shared(element::i32, shapeA); - auto B = make_shared(element::i32, shapeA); - auto neg_a = make_shared(A); - auto neg_b = make_shared(B); - auto add = neg_a + neg_b; - auto lk = make_shared( - NodeVector{add}, NodeVector{add}, NodeVector{neg_a, neg_b}); - auto f = make_shared(NodeVector{lk}, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("CPU"); - shared_ptr a = backend->create_tensor(element::i32, shapeA); - shared_ptr b = backend->create_tensor(element::i32, shapeA); - shared_ptr result = backend->create_tensor(element::i32, shapeA); - - vector dataA{1, 4, 1, 4}; - copy_data(a, dataA); - vector dataB{1, 2, 3, 4}; - copy_data(b, dataB); - vector expected{-2, -6, -4, -8}; - backend->call_with_validate(f, {result}, {a, b}); - EXPECT_EQ(read_vector(result), expected); -} - -TEST(cpu_fusion, loop_kernel_two_inputs_one_output) -{ - Shape shapeA{2, 2}; - auto A = make_shared(element::i32, shapeA); - auto B = make_shared(element::i32, shapeA); - auto add = A + B; - auto lk = make_shared( - NodeVector{add}, NodeVector{add}, NodeVector{A, B}); - auto f = make_shared(NodeVector{lk}, op::ParameterVector{A, B}); - - auto backend = runtime::Backend::create("CPU"); - shared_ptr a = backend->create_tensor(element::i32, shapeA); - shared_ptr b = backend->create_tensor(element::i32, shapeA); - shared_ptr result = backend->create_tensor(element::i32, shapeA); - - vector dataA{1, 4, 1, 4}; - copy_data(a, dataA); - vector dataB{1, 2, 3, 4}; - copy_data(b, dataB); - vector expected{2, 6, 4, 8}; - - backend->call_with_validate(f, {result}, {a, b}); - - EXPECT_EQ(read_vector(result), expected); -} - -TEST(cpu_fusion, loop_kernel_multiple_outputs) -{ - Shape shapeA{2, 2}; - auto A = make_shared(element::i32, shapeA); - auto B = make_shared(element::i32, shapeA); - auto C = make_shared(element::i32, shapeA); - auto D = make_shared(element::i32, shapeA); - - auto neg_a = make_shared(A); - auto neg_b = make_shared(B); - auto add_ab = neg_a + neg_b; - auto add_cd = C + B; - auto add_cd_abs = make_shared(add_cd); - auto add_ab_abs = make_shared(add_ab); - auto add_aab = add_ab_abs + A; - auto add_cdd = add_cd_abs + D; - - auto lk = make_shared( - NodeVector{neg_a, neg_b, add_ab, add_cd, add_cd_abs, add_ab_abs, add_aab, add_cdd}, - NodeVector{add_aab, add_cdd, neg_b}, - NodeVector{A, B, C, D}); - auto add_aab_goe = std::make_shared(lk, 0); - auto add_cdd_goe = std::make_shared(lk, 1); - auto neg_b_goe = std::make_shared(lk, 2); - - auto f = make_shared(NodeVector{add_aab_goe, add_cdd_goe, neg_b_goe}, - op::ParameterVector{A, B, C, D}); - - auto backend = runtime::Backend::create("CPU"); - - shared_ptr a = backend->create_tensor(element::i32, shapeA); - shared_ptr b = backend->create_tensor(element::i32, shapeA); - shared_ptr c = backend->create_tensor(element::i32, shapeA); - shared_ptr d = backend->create_tensor(element::i32, shapeA); - shared_ptr r1 = backend->create_tensor(element::i32, shapeA); - shared_ptr r2 = backend->create_tensor(element::i32, shapeA); - shared_ptr r3 = backend->create_tensor(element::i32, shapeA); - - vector dataA{1, 4, 1, 4}; - vector dataB{3, 3, 3, 9}; - vector dataC{1, 2, 3, 4}; - vector dataD{-2, 2, -1, 1}; - copy_data(a, dataA); - copy_data(b, dataB); - copy_data(c, dataC); - copy_data(d, dataD); - - backend->call_with_validate(f, {r1, r2, r3}, {a, b, c, d}); - - vector expected1{5, 11, 5, 17}; - vector expected2{2, 7, 5, 14}; - vector expected3{-3, -3, -3, -9}; - EXPECT_EQ(read_vector(r1), expected1); - EXPECT_EQ(read_vector(r2), expected2); - EXPECT_EQ(read_vector(r3), expected3); -} - -TEST(cpu_fusion, loop_kernel_copy_with_new_args) -{ - Shape shapeA{2, 2}; - auto A = make_shared(element::i32, shapeA); - auto B = make_shared(element::i32, shapeA); - auto C = make_shared(element::i32, shapeA); - auto D = make_shared(element::i32, shapeA); - - auto neg_a = make_shared(A); - auto neg_b = make_shared(B); - auto add_ab = neg_a + neg_b; - auto add_cd = C + B; - auto add_cd_abs = make_shared(add_cd); - auto add_ab_abs = make_shared(add_ab); - auto add_aab = add_ab_abs + A; - auto add_cdd = add_cd_abs + D; - - auto lk = make_shared( - NodeVector{neg_a, neg_b, add_ab, add_cd, add_cd_abs, add_ab_abs, add_aab, add_cdd}, - NodeVector{add_aab, add_cdd, neg_b}, - NodeVector{A, B, C, D}); - auto add_aab_goe = std::make_shared(lk, 0); - auto add_cdd_goe = std::make_shared(lk, 1); - auto neg_b_goe = std::make_shared(lk, 2); - - auto f = make_shared(NodeVector{add_aab_goe, add_cdd_goe, neg_b_goe}, - op::ParameterVector{A, B, C, D}); - - auto copy_f = clone_function(*f); - - auto backend = runtime::Backend::create("CPU"); - - shared_ptr a = backend->create_tensor(element::i32, shapeA); - shared_ptr b = backend->create_tensor(element::i32, shapeA); - shared_ptr c = backend->create_tensor(element::i32, shapeA); - shared_ptr d = backend->create_tensor(element::i32, shapeA); - shared_ptr r1 = backend->create_tensor(element::i32, shapeA); - shared_ptr r2 = backend->create_tensor(element::i32, shapeA); - shared_ptr r3 = backend->create_tensor(element::i32, shapeA); - shared_ptr copy_r1 = backend->create_tensor(element::i32, shapeA); - shared_ptr copy_r2 = backend->create_tensor(element::i32, shapeA); - shared_ptr copy_r3 = backend->create_tensor(element::i32, shapeA); - - vector dataA{1, 4, 1, 4}; - vector dataB{3, 3, 3, 9}; - vector dataC{1, 2, 3, 4}; - vector dataD{-2, 2, -1, 1}; - copy_data(a, dataA); - copy_data(b, dataB); - copy_data(c, dataC); - copy_data(d, dataD); - - backend->call_with_validate(f, {r1, r2, r3}, {a, b, c, d}); - backend->call_with_validate(copy_f, {copy_r1, copy_r2, copy_r3}, {a, b, c, d}); - - EXPECT_EQ(read_vector(r1), read_vector(copy_r1)); - EXPECT_EQ(read_vector(r2), read_vector(copy_r2)); - EXPECT_EQ(read_vector(r3), read_vector(copy_r3)); -} - -#endif - -static std::shared_ptr make_forward_function() -{ - Shape shape_a{10, 3, 28, 28}; - auto input = std::make_shared(element::f32, shape_a); - Shape window_shape{2, 2}; - auto max_pool = std::make_shared(input, window_shape); - auto neg = std::make_shared(max_pool); - auto absn = std::make_shared(max_pool); - return std::make_shared(NodeVector{max_pool, neg, absn}, op::ParameterVector{input}); -} - -static std::pair, std::vector>> - make_backward_function(std::shared_ptr f) -{ - // get parameters - std::vector> back_parameters = f->get_parameters(); - - ngraph::NodeVector adjoints; - ngraph::NodeVector outputs; - for (auto Y : f->get_results()) - { - // Get the output - // Create the Adjoint - auto C = std::make_shared(Y->get_element_type(), Y->get_shape()); - outputs.push_back(Y); - adjoints.push_back(C); - } - - ngraph::autodiff::Adjoints adjoint{outputs, adjoints}; - - // Perform autodiff - std::vector> dYdXs(back_parameters.size()); - transform(back_parameters.begin(), - back_parameters.end(), - dYdXs.begin(), - [&adjoint](const std::shared_ptr& X) { return adjoint.backprop_node(X); }); - - // create the backward function - std::vector> param_adjoints; - for (auto n : adjoints) - param_adjoints.push_back(std::dynamic_pointer_cast(n)); - back_parameters.insert(back_parameters.begin(), param_adjoints.begin(), param_adjoints.end()); - - return {std::make_shared(dYdXs, back_parameters), adjoints}; -} - -void optimize_graph(std::shared_ptr& f, std::shared_ptr bf) -{ - // start by removing excess reshapes - NodeVector nv_cwi; - ngraph::pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.register_pass(); - pass_manager.register_pass(nv_cwi); - pass_manager.register_pass("before.fprop_cache.pdf"); - - pass_manager.run_passes(f); - pass_manager.run_passes(bf); - if (nv_cwi.size() > 0) - { - NodeVector new_outputs; - for (auto r : f->get_results()) - { - new_outputs.push_back(r->get_argument(0)); - } - - new_outputs.insert(new_outputs.end(), nv_cwi.begin(), nv_cwi.end()); - f = std::make_shared(new_outputs, f->get_parameters()); - } - - ngraph::NodeVector dYdXs; - for (size_t i = 0; i < bf->get_output_size(); ++i) - { - dYdXs.push_back(bf->get_output_op(i)->get_argument(0)); - } - - ngraph::NodeVector combined_outputs; - for (auto r : f->get_results()) - { - combined_outputs.push_back(r->get_argument(0)); - } - - combined_outputs.insert(combined_outputs.end(), dYdXs.begin(), dYdXs.end()); - - std::vector> combined_parameters = f->get_parameters(); - std::vector> back_parameters = bf->get_parameters(); - - combined_parameters.insert( - combined_parameters.end(), back_parameters.begin(), back_parameters.end()); - auto combinedf = std::make_shared(combined_outputs, combined_parameters); - // rerun Reshape elimination to help simplify the graph again, run CPUFusion - // this replaces nodes in both f and bf due to shared-ptr - ness - ngraph::pass::Manager pass_manager_comb; - pass_manager_comb.register_pass(); - pass_manager_comb.register_pass(); - pass_manager_comb.run_passes(combinedf); -} - -TEST(cpu_fusion, maxpool_with_indices_in_mxnet) -{ - auto f = make_forward_function(); - auto bfa = make_backward_function(f); - auto maybe_bf = bfa.first; - auto adjoints = bfa.second; - optimize_graph(f, maybe_bf); - auto fprop_cache = ngraph::cache_fprop(f, maybe_bf); - - auto mpwi_bprop = fprop_cache.bprop->get_results().at(0)->get_argument(0); - ASSERT_TRUE(std::dynamic_pointer_cast(mpwi_bprop->get_argument(0))); - ASSERT_TRUE(std::dynamic_pointer_cast(mpwi_bprop->get_argument(2))); -} - -TEST(cpu_fusion, conv_batch_norm_folding) -{ - Shape shape_input{1, 8, 3, 3}; - Shape shape_weights{2, 8, 1, 1}; - Shape shape_norm{2}; - - auto make_function = [shape_input, shape_weights, shape_norm]() { - auto input = std::make_shared(element::f32, shape_input); - auto weights = std::make_shared(element::f32, shape_weights); - double eps = 0.001; - auto gamma = std::make_shared(element::f32, shape_norm); - auto beta = std::make_shared(element::f32, shape_norm); - auto mean = std::make_shared(element::f32, shape_norm); - auto var = std::make_shared(element::f32, shape_norm); - auto conv = std::make_shared(input, weights, Strides{1, 1}, Strides{1, 1}); - auto bn = std::make_shared(conv, gamma, beta, mean, var, eps); - auto f = make_shared(NodeVector{bn}, - op::ParameterVector{input, weights, gamma, beta, mean, var}); - return f; - }; - - auto int_f = make_function(); - auto cpu_f = make_function(); - - vector> args{ - {1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f, 7.25f, 8.25f, -1.25f, - -1.25f, 1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f, -4.25f, 2.25f, - 4.25f, 4.25f, 0.f, 0.f, -1.f, 0.f, 2.f, 2.f, 0.f, 0.f, 0.f, - 0.f, 2.f, 2.f, 1.25f, 2.25f, 5.25f, 6.25f, 1.25f, 1.25f, 3.25f, 4.25f, - -7.25f, 8.25f, 1.25f, -1.25f, -1.25f, 2.25f, 3.25f, 2.25f, -4.25f, -4.25f, -1.25f, - -2.25f, 4.25f, 2.25f, 4.25f, 4.25f, 0.f, 0.f, 1.f, 0.f, -2.f, 2.f, - 0.f, 0.f, 0.f, 0.f, -2.f, -2.f}, - {1.25f, - 2.25f, - 5.25f, - 6.25f, - -1.25f, - -1.25f, - 3.25f, - -4.25f, - 7.25f, - 8.25f, - -1.25f, - 0.f, - 0.f, - 0.f, - 0.f, - -2.f}, - {-0.9384f, 0.01875f}, - {11.0f, 1.3f}, - {0.12f, 0.31f}, - {0.01f, 0.11f}, - }; - - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); -} - -TEST(cpu_fusion, convbias_batch_norm_folding) -{ - Shape shape_input{2, 8, 5, 5}; - Shape shape_weights{2, 8, 2, 2}; - Shape shape_norm{2}; - - auto make_function = [shape_input, shape_weights, shape_norm]() { - auto input = std::make_shared(element::f32, shape_input); - auto weights = std::make_shared(element::f32, shape_weights); - auto bias = std::make_shared(element::f32, Shape{2}); - double eps = 1.01; - auto gamma = std::make_shared(element::f32, shape_norm); - auto beta = std::make_shared(element::f32, shape_norm); - auto mean = std::make_shared(element::f32, shape_norm); - auto var = std::make_shared(element::f32, shape_norm); - auto conv = std::make_shared(input, weights, Strides{1, 1}, Strides{1, 1}); - auto convbias = - conv + std::make_shared(bias, conv->get_shape(), AxisSet{0, 2, 3}); - auto bn = std::make_shared(convbias, gamma, beta, mean, var, eps); - auto f = make_shared( - NodeVector{bn}, op::ParameterVector{input, weights, bias, gamma, beta, mean, var}); - return f; - }; - - auto int_f = make_function(); - auto cpu_f = make_function(); - - test::Uniform rng(1.0f, 100.0f); - vector> args; - for (shared_ptr param : cpu_f->get_parameters()) - { - vector tensor_val(shape_size(param->get_shape())); - rng.initialize(tensor_val); - args.push_back(tensor_val); - } - - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); -} - -TEST(cpu_fusion, conv_affine_folding) -{ - Shape shape_input{1, 8, 3, 3}; - Shape shape_weights{2, 8, 1, 1}; - Shape shape_norm{2}; - - auto make_function = [shape_input, shape_weights, shape_norm]() { - auto input = std::make_shared(element::f32, shape_input); - auto weights = std::make_shared(element::f32, shape_weights); - - auto a = std::make_shared(element::f32, shape_norm); - auto b = std::make_shared(element::f32, shape_norm); - auto conv = std::make_shared(input, weights, Strides{1, 1}, Strides{1, 1}); - auto out = std::make_shared( - std::make_shared( - conv, std::make_shared(a, conv->get_shape(), AxisSet{0, 2, 3})), - std::make_shared(b, conv->get_shape(), AxisSet{0, 2, 3})); - auto f = make_shared(NodeVector{out}, op::ParameterVector{input, weights, a, b}); - return f; - }; - - auto int_f = make_function(); - auto cpu_f = make_function(); - - vector> args{ - {1.25f, 2.25f, 5.25f, 6.25f, -1.25f, -1.25f, 3.25f, -4.25f, 7.25f, 8.25f, -1.25f, - -1.25f, 1.25f, 2.25f, -3.25f, 2.25f, 4.25f, 4.25f, 1.25f, 2.25f, -4.25f, 2.25f, - 4.25f, 4.25f, 0.f, 0.f, -1.f, 0.f, 2.f, 2.f, 0.f, 0.f, 0.f, - 0.f, 2.f, 2.f, 1.25f, 2.25f, 5.25f, 6.25f, 1.25f, 1.25f, 3.25f, 4.25f, - -7.25f, 8.25f, 1.25f, -1.25f, -1.25f, 2.25f, 3.25f, 2.25f, -4.25f, -4.25f, -1.25f, - -2.25f, 4.25f, 2.25f, 4.25f, 4.25f, 0.f, 0.f, 1.f, 0.f, -2.f, 2.f, - 0.f, 0.f, 0.f, 0.f, -2.f, -2.f}, - {1.25f, - 2.25f, - 5.25f, - 6.25f, - -1.25f, - -1.25f, - 3.25f, - -4.25f, - 7.25f, - 8.25f, - -1.25f, - 0.f, - 0.f, - 0.f, - 0.f, - -2.f}, - {-0.9384f, 0.01875f}, - {11.0f, 1.3f}, - }; - - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); -} - -TEST(cpu_fusion, convbias_affine_folding) -{ - Shape shape_input{1, 6, 3, 3}; - Shape shape_weights{3, 6, 1, 1}; - Shape shape_norm{3}; - - auto make_function = [shape_input, shape_weights, shape_norm]() { - auto input = std::make_shared(element::f32, shape_input); - auto weights = std::make_shared(element::f32, shape_weights); - auto bias = std::make_shared(element::f32, Shape{3}); - - auto a = std::make_shared(element::f32, shape_norm); - auto b = std::make_shared(element::f32, shape_norm); - auto conv = std::make_shared(input, weights, Strides{1, 1}, Strides{1, 1}); - auto convbias = - conv + std::make_shared(bias, conv->get_shape(), AxisSet{0, 2, 3}); - auto out = std::make_shared( - std::make_shared( - convbias, std::make_shared(a, conv->get_shape(), AxisSet{0, 2, 3})), - std::make_shared(b, conv->get_shape(), AxisSet{0, 2, 3})); - auto f = - make_shared(NodeVector{out}, op::ParameterVector{input, weights, bias, a, b}); - return f; - }; - - auto int_f = make_function(); - auto cpu_f = make_function(); - - test::Uniform rng(20.0f, 300.0f); - vector> args; - for (shared_ptr param : cpu_f->get_parameters()) - { - vector tensor_val(shape_size(param->get_shape())); - rng.initialize(tensor_val); - args.push_back(tensor_val); - } - - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0))); -} - -TEST(cpu_fusion, group_convolution_fusion) -{ - Shape shape_a{1, 32, 2, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{2, 16, 1, 1}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{1, 2, 2, 2}; - - auto a_slice0 = std::make_shared(A, Coordinate{0, 0, 0, 0}, Coordinate{1, 16, 2, 2}); - auto a_slice1 = - std::make_shared(A, Coordinate{0, 16, 0, 0}, Coordinate{1, 32, 2, 2}); - - auto b_slice0 = std::make_shared(B, Coordinate{0, 0, 0, 0}, Coordinate{1, 16, 1, 1}); - auto b_slice1 = std::make_shared(B, Coordinate{1, 0, 0, 0}, Coordinate{2, 16, 1, 1}); - - auto conv_lower = make_shared(a_slice0, - b_slice0, - Strides{1, 1}, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - - auto conv_upper = make_shared(a_slice1, - b_slice1, - Strides{1, 1}, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - - auto concat = make_shared(NodeVector{conv_lower, conv_upper}, 1); - - auto f = make_shared(NodeVector{concat}, op::ParameterVector{A, B}); - pass::Manager pass_manager; - pass_manager.register_pass("before_group.pdf"); - pass_manager.register_pass(); - pass_manager.register_pass("after_group.pdf"); - pass_manager.run_passes(f); - auto gc = - std::dynamic_pointer_cast(f->get_results().at(0)->get_argument(0)); - ASSERT_TRUE(gc); -} - -TEST(cpu_fusion, group_convolution) -{ - auto backend = runtime::Backend::create("CPU"); - test::Uniform rng(2.0f, 10.0f); - - const size_t GROUPS = 2; - Shape shape_a{1, 32, 2, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_b{2, 16, 1, 1}; - auto B = make_shared(element::f32, shape_b); - Shape shape_r{1, 2, 2, 2}; - auto group_conv = make_shared(A, - B, - Strides{1, 1}, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}, - GROUPS, - shape_r); - - Shape shape_c{1, 16, 2, 2}; - auto C = make_shared(element::f32, shape_c); - Shape shape_d{1, 16, 1, 1}; - auto D = make_shared(element::f32, shape_d); - auto conv_lower = make_shared(C, - D, - Strides{1, 1}, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - - auto E = make_shared(element::f32, shape_c); - auto F = make_shared(element::f32, shape_d); - auto conv_upper = make_shared(E, - F, - Strides{1, 1}, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - - auto f = make_shared(NodeVector{group_conv, conv_lower, conv_upper}, - op::ParameterVector{A, B, C, D, E, F}); - - auto a_ = rng.initialize(backend->create_tensor(element::f32, shape_a)); - auto b_ = rng.initialize(backend->create_tensor(element::f32, shape_b)); - - vector rv(shape_size(shape_r), 0); - auto group_result = std::dynamic_pointer_cast( - backend->create_tensor(element::f32, shape_r, rv.data())); - - auto av = read_vector(a_); - auto bv = read_vector(b_); - auto c_ = backend->create_tensor(element::f32, shape_c, av.data()); // lower data - auto d_ = backend->create_tensor(element::f32, shape_d, bv.data()); // upper data - - auto e_ = - backend->create_tensor(element::f32, shape_c, av.data() + av.size() / 2); // lower weights - auto f_ = - backend->create_tensor(element::f32, shape_d, bv.data() + bv.size() / 2); // upper weights - - Shape shape_ur{1, 1, 2, 2}; - // allocate a contigious storage for both lower and upper halves. - vector erv(shape_size(shape_r), 0); - auto lower_result = std::dynamic_pointer_cast( - backend->create_tensor(element::f32, shape_ur, erv.data())); - auto upper_result = std::dynamic_pointer_cast( - backend->create_tensor(element::f32, shape_ur, erv.data() + erv.size() / 2)); - backend->call_with_validate( - f, {group_result, lower_result, upper_result}, {a_, b_, c_, d_, e_, f_}); - ASSERT_EQ(rv, erv); -} - -//TODO(Pruthvi) enable this test after MKLDNN RNN bug is fixed -#if 0 -TEST(cpu_fusion, rnn_fprop_1_lstm_cell) -{ - auto src_layer = make_shared(element::f32, Shape{10, 100}); - auto src_iter = make_shared(element::f32, Shape{20, 100}); - auto weights_layer = make_shared(element::f32, Shape{400, 100}); - auto weights_iter = make_shared(element::f32, Shape{400, 100}); - auto biases = make_shared(element::f32, Shape{400}); - const int number_of_timesteps = 1; - const int number_of_gates_per_cell = 4; - const int src_seq_length = 1; - const int src_layer_feature_size = 100; - const int feature_size = 100; - const int num_rnn_cell_states = 2; - const int rnn_direction = 1; - const int num_of_rnn_fused_layer = 1; - auto rnn_node = make_shared(src_layer, - src_iter, - weights_layer, - weights_iter, - biases, - number_of_timesteps, - number_of_gates_per_cell, - src_seq_length, - src_layer_feature_size, - feature_size, - num_rnn_cell_states, - rnn_direction, - num_of_rnn_fused_layer); - auto rnn_ht_output = make_shared(rnn_node, 0); - auto rnn_ct_output = make_shared(rnn_node, 1); - - auto func = make_shared( - NodeVector{rnn_ht_output, rnn_ct_output}, - op::ParameterVector{src_layer, src_iter, weights_layer, weights_iter, biases}); - auto backend = runtime::Backend::create("CPU"); - - shared_ptr src_layer_t = - backend->create_tensor(element::f32, src_layer->get_shape()); - shared_ptr src_iter_t = - backend->create_tensor(element::f32, src_iter->get_shape()); - shared_ptr weights_layer_t = - backend->create_tensor(element::f32, weights_layer->get_shape()); - shared_ptr weights_iter_t = - backend->create_tensor(element::f32, weights_iter->get_shape()); - shared_ptr biases_t = - backend->create_tensor(element::f32, biases->get_shape()); - shared_ptr result_ht = backend->create_tensor(element::f32, {10, 100}); - shared_ptr result_ct = - backend->create_tensor(element::f32, Shape{20, 100}); - - copy_data(src_layer_t, vector(1000, 1)); - copy_data(src_iter_t, vector(2000, 1)); - copy_data(weights_layer_t, vector(400 * 100, 1)); - copy_data(weights_iter_t, vector(400 * 100, 1)); - copy_data(biases_t, vector(400, 1)); - - backend->call_with_validate( - func, - {result_ht, result_ct}, - {src_layer_t, src_iter_t, weights_layer_t, weights_iter_t, biases_t}); - vector expected_ht(10 * 100, 0.964028f); - vector expected_ct; - for (size_t i = 0; i < 20 * 100; i++) - { - if (i < 1000) - { - expected_ct.push_back(0.964028f); - } - else - { - expected_ct.push_back(2.0f); - } - } - - EXPECT_TRUE(test::all_close(expected_ht, read_vector(result_ht))); - EXPECT_TRUE(test::all_close(expected_ct, read_vector(result_ct))); -} -#endif - -TEST(cpu_fusion, fuse_lstm_cells) -{ - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.register_pass(); - const string json_path = - file_util::path_join(SERIALIZED_ZOO, "mxnet/2rnn_layer_3lstm_cell.json"); - const string json_string = file_util::read_file_to_string(json_path); - stringstream ss(json_string); - shared_ptr func = ngraph::deserialize(ss); - pass_manager.run_passes(func); - auto lstm_ops = get_ops_of_type(func); - EXPECT_EQ(lstm_ops.size(), 6); -} - -TEST(cpu_fusion, fuse_2_layer_rnn) -{ - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.register_pass(); - const string json_path = - file_util::path_join(SERIALIZED_ZOO, "mxnet/2rnn_layer_3lstm_cell.json"); - const string json_string = file_util::read_file_to_string(json_path); - stringstream ss(json_string); - shared_ptr func = ngraph::deserialize(ss); - pass_manager.run_passes(func); - size_t count = count_ops_of_type(func); - auto rnn_ops = get_ops_of_type(func); - EXPECT_EQ(rnn_ops.size(), count); - for (auto& node : rnn_ops) - { - EXPECT_EQ(node->get_num_timesteps(), node->get_src_sequence_length()); - EXPECT_EQ(node->get_num_cell_states(), node->get_argument(1)->get_arguments().size()); - } -} - -TEST(cpu_fusion, fuse_1_layer_rnn) -{ - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.register_pass(); - const string json_path = - file_util::path_join(SERIALIZED_ZOO, "mxnet/1rnn_layer_3lstm_cell.json"); - const string json_string = file_util::read_file_to_string(json_path); - stringstream ss(json_string); - shared_ptr func = ngraph::deserialize(ss); - pass_manager.run_passes(func); - size_t count = count_ops_of_type(func); - auto rnn_ops = get_ops_of_type(func); - EXPECT_EQ(rnn_ops.size(), 1); - EXPECT_EQ(rnn_ops.size(), count); - for (auto& node : rnn_ops) - { - EXPECT_EQ(node->get_num_timesteps(), node->get_src_sequence_length()); - EXPECT_EQ(node->get_num_cell_states(), node->get_argument(1)->get_arguments().size()); - } -} - -static std::shared_ptr make_function(const std::string& file_name) -{ - const string json_path = file_util::path_join(SERIALIZED_ZOO, file_name); - const string json_string = file_util::read_file_to_string(json_path); - stringstream ss(json_string); - shared_ptr func = ngraph::deserialize(ss); - return func; -} - -TEST(cpu_fusion, rnn_fusion_inter_vs_cpu_1lstm_cell) -{ - const std::string file_name("mxnet/1_lstm_cell_forward.json"); - auto cpu_f = make_function(file_name); - auto int_f = make_function(file_name); - test::Uniform rng(0.0f, 1.0f); - vector> args; - - for (shared_ptr param : int_f->get_parameters()) - { - vector tensor_val(shape_size(param->get_shape())); - rng.initialize(tensor_val); - args.push_back(tensor_val); - } - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - for (size_t i = 0; i < cpu_results.size(); i++) - { - EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); - } -} - -TEST(cpu_fusion, rnn_fusion_inter_vs_cpu_1rnn_layer_3lstm_cell) -{ - const std::string file_name("mxnet/1rnn_layer_3lstm_cell.json"); - auto cpu_f = make_function(file_name); - auto int_f = make_function(file_name); - test::Uniform rng(0.0f, 1.0f); - vector> args; - - for (shared_ptr param : int_f->get_parameters()) - { - vector tensor_val(shape_size(param->get_shape())); - rng.initialize(tensor_val); - args.push_back(tensor_val); - } - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - for (size_t i = 0; i < cpu_results.size(); i++) - { - EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); - } -} - -TEST(cpu_fusion, rnn_fusion_inter_vs_cpu_2rnn_layer_3lstm_cell) -{ - const std::string file_name("mxnet/2rnn_layer_3lstm_cell.json"); - auto cpu_f = make_function(file_name); - auto int_f = make_function(file_name); - test::Uniform rng(0.0f, 1.0f); - vector> args; - - for (shared_ptr param : int_f->get_parameters()) - { - vector tensor_val(shape_size(param->get_shape())); - rng.initialize(tensor_val); - args.push_back(tensor_val); - } - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - for (size_t i = 0; i < cpu_results.size(); i++) - { - EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); - } -} - -#if 0 - -TEST(cpu_fusion, loop_kernel_fusion_multiple_groups_pruned) -{ - auto make_function = []() -> std::shared_ptr { - Shape shape{}; - auto a = make_shared(element::f32, shape); - auto b = make_shared(element::f32, shape); - auto c = make_shared(element::f32, shape); - auto add_ab = a + b; - auto add_abs = std::make_shared(add_ab); - auto abs_neg = std::make_shared(add_abs); - auto sub_c_neg = c - abs_neg; - - auto d = make_shared(element::f32, shape); - auto d_abs = std::make_shared(d); - auto add_d = d_abs + add_ab; - auto neg_d = std::make_shared(add_d); - - auto mul_cd = neg_d * sub_c_neg; - auto f = - std::make_shared(ngraph::NodeVector{mul_cd}, op::ParameterVector{a, b, c, d}); - - return f; - }; - - pass::Manager pass_manager; - pass_manager.register_pass(3); - auto cpu_f = make_function(); - auto int_f = make_function(); - pass_manager.run_passes(cpu_f); - test::Uniform rng(-100.0f, 100.0f); - vector> args; - - size_t lkn = count_ops_of_type(cpu_f); - ASSERT_GT(lkn, 0); - - for (shared_ptr param : cpu_f->get_parameters()) - { - vector tensor_val(shape_size(param->get_shape())); - rng.initialize(tensor_val); - args.push_back(tensor_val); - } - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - for (size_t i = 0; i < cpu_results.size(); i++) - { - EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); - } -} - -TEST(cpu_fusion, loop_kernel_fusion_bounded_relu) -{ - auto make_function = []() -> std::shared_ptr { - Shape shape{}; - auto a = make_shared(element::f32, shape); - auto relu = make_shared(a); - auto upper_bound = - op::Constant::create(element::f32, shape, std::vector{6.0f}); - auto minn = make_shared(relu, upper_bound); - auto absn = make_shared(minn); - auto negn = std::make_shared(absn); - - auto f = std::make_shared(ngraph::NodeVector{negn}, op::ParameterVector{a}); - - return f; - }; - - pass::Manager pass_manager; - pass_manager.register_pass("before_relu_fusion.pdf"); - pass_manager.register_pass(3); - pass_manager.register_pass("after_relu_fusion.pdf"); - auto cpu_f = make_function(); - auto int_f = make_function(); - pass_manager.run_passes(cpu_f); - test::Uniform rng(-100.0f, 100.0f); - vector> args; - - size_t lkn = count_ops_of_type(cpu_f); - ASSERT_GT(lkn, 0); - - for (shared_ptr param : cpu_f->get_parameters()) - { - vector tensor_val(shape_size(param->get_shape())); - rng.initialize(tensor_val); - args.push_back(tensor_val); - } - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - for (size_t i = 0; i < cpu_results.size(); i++) - { - EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); - } -} - -TEST(cpu_fusion, loop_kernel_fusion_multiple_groups) -{ - auto make_function = []() -> std::shared_ptr { - Shape shape{}; - auto a = make_shared(element::f32, shape); - auto b = make_shared(element::f32, shape); - auto c = make_shared(element::f32, shape); - auto add_ab = a + b; - auto add_abs = std::make_shared(add_ab); - auto abs_neg = std::make_shared(add_abs); - auto sub_c_neg = c - abs_neg; - - auto d = make_shared(element::f32, shape); - auto d_abs = std::make_shared(d); - auto add_d = d_abs + add_ab; - auto neg_d = std::make_shared(add_d); - - auto mul_cd = neg_d * sub_c_neg; - auto f = - std::make_shared(ngraph::NodeVector{mul_cd}, op::ParameterVector{a, b, c, d}); - - return f; - }; - - pass::Manager pass_manager; - pass_manager.register_pass(2); - auto cpu_f = make_function(); - auto int_f = make_function(); - pass_manager.run_passes(cpu_f); - test::Uniform rng(-100.0f, 100.0f); - vector> args; - - size_t lkn = count_ops_of_type(cpu_f); - ASSERT_GT(lkn, 0); - - for (shared_ptr param : cpu_f->get_parameters()) - { - vector tensor_val(shape_size(param->get_shape())); - rng.initialize(tensor_val); - args.push_back(tensor_val); - } - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - for (size_t i = 0; i < cpu_results.size(); i++) - { - EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); - } -} - -TEST(cpu_fusion, loop_kernel_fusion_one_group) -{ - auto make_function = []() -> std::shared_ptr { - Shape shape{}; - auto a = make_shared(element::f32, shape); - auto b = make_shared(element::f32, shape); - auto c = make_shared(element::f32, shape); - auto add_ab = a + b; - auto add_abs = std::make_shared(add_ab); - auto abs_neg = std::make_shared(add_abs); - auto sub_c_neg = c - abs_neg; - auto d = make_shared(element::f32, shape); - auto add_d = sub_c_neg + d; - auto abs_add_d = std::make_shared(add_d); - auto e = make_shared(element::f32, shape); - auto add_e = e + abs_add_d; - auto neg_e = std::make_shared(add_e); - - auto f = std::make_shared(ngraph::NodeVector{neg_e}, - op::ParameterVector{a, b, c, d, e}); - - return f; - - }; - - pass::Manager pass_manager; - pass_manager.register_pass(2); - auto cpu_f = make_function(); - auto int_f = make_function(); - pass_manager.run_passes(cpu_f); - test::Uniform rng(-100.0f, 100.0f); - vector> args; - - size_t lkn = count_ops_of_type(cpu_f); - ASSERT_GT(lkn, 0); - - for (shared_ptr param : cpu_f->get_parameters()) - { - vector tensor_val(shape_size(param->get_shape())); - rng.initialize(tensor_val); - args.push_back(tensor_val); - } - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - for (size_t i = 0; i < cpu_results.size(); i++) - { - EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); - } -} - -#endif - -TEST(cpu_fusion, sigmoid_multiply_fusion) -{ - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.register_pass(); - const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/3_lstm_cell_forward.json"); - const string json_string = file_util::read_file_to_string(json_path); - stringstream ss(json_string); - shared_ptr func = ngraph::deserialize(ss); - pass_manager.run_passes(func); - size_t ccg = count_ops_of_type(func); - ASSERT_EQ(ccg, 18); -} - -void sigmoid_multiply_fusion_forward_compute(runtime::Backend* backend, - const op::ParameterVector& input_params, - const vector>& input_data, - const vector& input_shapes, - const Shape& result_shape, - shared_ptr input_0_node, - shared_ptr input_1_node, - const vector& expected) -{ - shared_ptr result_tensor = backend->create_tensor(element::f32, result_shape); - - vector> input_tensors; - for (int i = 0; i < input_params.size(); ++i) - { - input_tensors.push_back(backend->create_tensor(element::f32, input_shapes[i])); - copy_data(input_tensors[i], input_data[i]); - } - - auto mul_node = input_0_node * input_1_node; - auto func = make_shared(mul_node, input_params); - backend->call_with_validate(func, {result_tensor}, input_tensors); - EXPECT_TRUE(test::all_close(read_vector(result_tensor), expected)); -} - -TEST(cpu_fusion, sigmoid_multiply_fusion_forward) -{ - auto backend = runtime::Backend::create("CPU"); - - Shape data_shape{1, 1, 2, 2}; - Shape const_shape{1}; - - vector input_0_data{1.f, 2.f, 3.f, 4.f}; - vector input_1_data{1.2f, 2.3f, 3.5f, 4.7f}; - vector const_data{1.2f}; - { - auto input_0_param = make_shared(element::f32, data_shape); - auto input_1_param = make_shared(element::f32, data_shape); - auto input_2_param = make_shared(element::f32, data_shape); - auto sigmoid_0 = make_shared(input_0_param); - auto sigmoid_1 = make_shared(input_1_param, input_2_param); - vector expected{1.60833f, 3.78743f, 6.19173f, 8.54352f}; - op::ParameterVector input_params{input_0_param, input_1_param, input_2_param}; - vector> input_data{input_0_data, input_0_data, input_1_data}; - vector input_shapes{data_shape, data_shape, data_shape}; - sigmoid_multiply_fusion_forward_compute(backend.get(), - input_params, - input_data, - input_shapes, - data_shape, - sigmoid_0, - sigmoid_1, - expected); - } - { - auto input_0_param = make_shared(element::f32, data_shape); - auto input_1_param = make_shared(element::f32, const_shape); - auto sigmoid_0 = make_shared(input_1_param, data_shape, AxisSet{1, 2, 3}); - auto sigmoid_1 = make_shared(input_0_param); - vector expected{0.87727f, 1.05696f, 1.14309f, 1.17842f}; - op::ParameterVector input_params{input_0_param, input_1_param}; - vector> input_data{input_0_data, const_data}; - vector input_shapes{data_shape, const_shape}; - sigmoid_multiply_fusion_forward_compute(backend.get(), - input_params, - input_data, - input_shapes, - data_shape, - sigmoid_0, - sigmoid_1, - expected); - } - { - auto input_0_param = make_shared(element::f32, data_shape); - auto input_1_param = make_shared(element::f32, const_shape); - auto sigmoid_0 = make_shared(input_0_param); - auto sigmoid_1 = make_shared(input_1_param, data_shape, AxisSet{1, 2, 3}); - vector expected{0.87727f, 1.05696f, 1.14309f, 1.17842f}; - op::ParameterVector input_params{input_0_param, input_1_param}; - vector> input_data{input_0_data, const_data}; - vector input_shapes{data_shape, const_shape}; - sigmoid_multiply_fusion_forward_compute(backend.get(), - input_params, - input_data, - input_shapes, - data_shape, - sigmoid_0, - sigmoid_1, - expected); - } - { - auto input_0_param = make_shared(element::f32, data_shape); - auto input_1_param = make_shared(element::f32, data_shape); - auto sigmoid_0 = make_shared(input_0_param); - auto sigmoid_1 = make_shared(input_1_param); - vector expected{0.561837f, 0.800536f, 0.924652f, 0.973163f}; - op::ParameterVector input_params{input_0_param, input_1_param}; - vector> input_data{input_0_data, input_1_data}; - vector input_shapes{data_shape, data_shape}; - sigmoid_multiply_fusion_forward_compute(backend.get(), - input_params, - input_data, - input_shapes, - data_shape, - sigmoid_0, - sigmoid_1, - expected); - } - { - auto input_0_param = make_shared(element::f32, data_shape); - auto input_1_param = make_shared(element::f32, data_shape); - auto sigmoid_0 = make_shared(input_0_param); - auto sigmoid_1 = make_shared(input_1_param); - vector expected{0.60945f, 0.863266f, 0.950838f, 0.981851f}; - op::ParameterVector input_params{input_0_param, input_1_param}; - vector> input_data{input_0_data, input_1_data}; - vector input_shapes{data_shape, data_shape}; - sigmoid_multiply_fusion_forward_compute(backend.get(), - input_params, - input_data, - input_shapes, - data_shape, - sigmoid_0, - sigmoid_1, - expected); - } - { - auto input_0_param = make_shared(element::f32, data_shape); - auto input_1_param = make_shared(element::f32, data_shape); - auto sigmoid_0 = make_shared(input_0_param); - auto sigmoid_1 = make_shared(input_1_param); - vector expected{0.585304f, 0.876182f, 0.965887f, 0.990322f}; - op::ParameterVector input_params{input_0_param, input_1_param}; - vector> input_data{input_0_data, input_1_data}; - vector input_shapes{data_shape, data_shape}; - sigmoid_multiply_fusion_forward_compute(backend.get(), - input_params, - input_data, - input_shapes, - data_shape, - sigmoid_0, - sigmoid_1, - expected); - } - { - auto input_0_param = make_shared(element::f32, data_shape); - auto input_1_param = make_shared(element::f32, data_shape); - auto sigmoid_0 = make_shared(input_0_param); - auto sigmoid_1 = make_shared(input_1_param); - vector expected{0.634907f, 0.94484f, 0.993242f, 0.999164f}; - op::ParameterVector input_params{input_0_param, input_1_param}; - vector> input_data{input_0_data, input_1_data}; - vector input_shapes{data_shape, data_shape}; - sigmoid_multiply_fusion_forward_compute(backend.get(), - input_params, - input_data, - input_shapes, - data_shape, - sigmoid_0, - sigmoid_1, - expected); - } -} - -void sigmoid_multiply_fusion_backward_compute(runtime::Backend* backend, - const op::ParameterVector& input_params, - const vector>& input_data, - const vector& input_shapes, - const vector delta_data, - const Shape& delta_shape, - const Shape& d_input_0_shape, - const Shape& d_input_1_shape, - shared_ptr input_0_node, - shared_ptr input_1_node, - shared_ptr input_0_adjoint, - shared_ptr input_1_adjoint, - const vector& expected_0, - const vector& expected_1) -{ - vector> input_tensors; - for (int i = 0; i < input_params.size(); ++i) - { - input_tensors.push_back(backend->create_tensor(element::f32, input_shapes[i])); - copy_data(input_tensors[i], input_data[i]); - } - - auto delta_param = make_shared(element::f32, delta_shape); - shared_ptr delta_tensor = backend->create_tensor(element::f32, delta_shape); - copy_data(delta_tensor, delta_data); - - op::ParameterVector back_params(input_params); - back_params.push_back(delta_param); - input_tensors.push_back(delta_tensor); - - shared_ptr d_input_0_tensor = - backend->create_tensor(element::f32, d_input_0_shape); - shared_ptr d_input_1_tensor = - backend->create_tensor(element::f32, d_input_1_shape); - - using FunctionType = op::SigmoidMultiply::FunctionType; - auto input_0_type = op::SigmoidMultiply::identify_node_type(input_0_node); - auto input_1_type = op::SigmoidMultiply::identify_node_type(input_1_node); - // for Identity functions, we use the node itself, otherwise use its input - // where we will apply the function of input node - auto input_0_alt = - (input_0_type == FunctionType::Identity) ? input_0_node : input_0_node->get_argument(0); - auto input_1_alt = - (input_1_type == FunctionType::Identity) ? input_1_node : input_1_node->get_argument(0); - auto sigmoid_mul = - make_shared(input_0_alt, input_1_alt, input_0_type, input_1_type); - - ngraph::autodiff::Adjoints adjoints(NodeVector{sigmoid_mul}, NodeVector{delta_param}); - auto d_input_0 = adjoints.backprop_node(input_0_adjoint); - auto d_input_1 = adjoints.backprop_node(input_1_adjoint); - auto df = make_shared(NodeVector{d_input_0, d_input_1}, back_params); - backend->call_with_validate(df, {d_input_0_tensor, d_input_1_tensor}, input_tensors); - EXPECT_TRUE(test::all_close(read_vector(d_input_0_tensor), expected_0)); - EXPECT_TRUE(test::all_close(read_vector(d_input_1_tensor), expected_1)); -} - -TEST(cpu_fusion, sigmoid_multiply_fusion_backward) -{ - auto backend = runtime::Backend::create("CPU"); - - Shape data_shape{1, 1, 2, 2}; - Shape const_shape{1}; - - vector input_0_data{1.f, 2.f, 3.f, 4.f}; - vector input_1_data{1.2f, 2.2f, 3.2f, 4.2f}; - vector const_data{1.2f}; - vector delta_data(shape_size(data_shape), 20.0f); - - { - auto input_0_param = make_shared(element::f32, data_shape); - auto input_1_param = make_shared(element::f32, data_shape); - auto input_2_param = make_shared(element::f32, data_shape); - auto sigmoid_0 = make_shared(input_0_param); - auto sigmoid_1 = make_shared(input_1_param, input_2_param); - vector expected_0{8.65093f, 8.81946f, 5.60191f, 2.89668f}; - vector expected_1{14.6212f, 17.6159f, 19.0515f, 19.6403f}; - op::ParameterVector input_params{input_0_param, input_1_param, input_2_param}; - vector> input_data{input_0_data, input_0_data, input_1_data}; - vector input_shapes{data_shape, data_shape, data_shape}; - sigmoid_multiply_fusion_backward_compute(backend.get(), - input_params, - input_data, - input_shapes, - delta_data, - data_shape, - data_shape, - data_shape, - sigmoid_0, - sigmoid_1, - input_0_param, - sigmoid_1, - expected_0, - expected_1); - } - { - auto input_0_param = make_shared(element::f32, data_shape); - auto input_1_param = make_shared(element::f32, const_shape); - auto sigmoid_0 = make_shared(input_1_param, data_shape, AxisSet{1, 2, 3}); - auto sigmoid_1 = make_shared(input_0_param); - vector expected_0{15.2319f, 19.2806f, 19.9011f, 19.9866f}; - vector expected_1{10.0794f, 1.69562f, 0.236785f, 0.0321828f}; - op::ParameterVector input_params{input_0_param, input_1_param}; - vector> input_data{input_0_data, const_data}; - vector input_shapes{data_shape, const_shape}; - sigmoid_multiply_fusion_backward_compute(backend.get(), - input_params, - input_data, - input_shapes, - delta_data, - data_shape, - data_shape, - data_shape, - sigmoid_0, - sigmoid_1, - sigmoid_0, - input_0_param, - expected_0, - expected_1); - } - { - auto input_0_param = make_shared(element::f32, data_shape); - auto input_1_param = make_shared(element::f32, const_shape); - auto sigmoid_0 = make_shared(input_0_param); - auto sigmoid_1 = make_shared(input_1_param, data_shape, AxisSet{1, 2, 3}); - vector expected_0{10.0794f, 1.69562f, 0.236785f, 0.0321828f}; - vector expected_1{15.2319f, 19.2806f, 19.9011f, 19.9866f}; - op::ParameterVector input_params{input_0_param, input_1_param}; - vector> input_data{input_0_data, const_data}; - vector input_shapes{data_shape, const_shape}; - sigmoid_multiply_fusion_backward_compute(backend.get(), - input_params, - input_data, - input_shapes, - delta_data, - data_shape, - data_shape, - data_shape, - sigmoid_0, - sigmoid_1, - input_0_param, - sigmoid_1, - expected_0, - expected_1); - } - { - auto input_0_param = make_shared(element::f32, data_shape); - auto input_1_param = make_shared(element::f32, data_shape); - auto sigmoid_0 = make_shared(input_0_param); - auto sigmoid_1 = make_shared(input_1_param); - vector expected_0{3.02202f, 1.89041f, 0.868146f, 0.348035f}; - vector expected_1{2.60102f, 1.58192f, 0.716941f, 0.285879f}; - op::ParameterVector input_params{input_0_param, input_1_param}; - vector> input_data{input_0_data, input_1_data}; - vector input_shapes{data_shape, data_shape}; - sigmoid_multiply_fusion_backward_compute(backend.get(), - input_params, - input_data, - input_shapes, - delta_data, - data_shape, - data_shape, - data_shape, - sigmoid_0, - sigmoid_1, - input_0_param, - input_1_param, - expected_0, - expected_1); - } - { - auto input_0_param = make_shared(element::f32, data_shape); - auto input_1_param = make_shared(element::f32, data_shape); - auto sigmoid_0 = make_shared(input_0_param); - auto sigmoid_1 = make_shared(input_1_param); - vector expected_0{3.27813f, 2.04894f, 0.900536f, 0.353095f}; - vector expected_1{4.45975f, 0.84425f, 0.126201f, 0.0176579f}; - op::ParameterVector input_params{input_0_param, input_1_param}; - vector> input_data{input_0_data, input_1_data}; - vector input_shapes{data_shape, data_shape}; - sigmoid_multiply_fusion_backward_compute(backend.get(), - input_params, - input_data, - input_shapes, - delta_data, - data_shape, - data_shape, - data_shape, - sigmoid_0, - sigmoid_1, - input_0_param, - input_1_param, - expected_0, - expected_1); - } - { - auto input_0_param = make_shared(element::f32, data_shape); - auto input_1_param = make_shared(element::f32, data_shape); - auto sigmoid_0 = make_shared(input_0_param); - auto sigmoid_1 = make_shared(input_1_param); - vector expected_0{6.45521f, 1.27207f, 0.189593f, 0.0264228f}; - vector expected_1{2.70967f, 1.7314f, 0.748913f, 0.29092f}; - op::ParameterVector input_params{input_0_param, input_1_param}; - vector> input_data{input_0_data, input_1_data}; - vector input_shapes{data_shape, data_shape}; - sigmoid_multiply_fusion_backward_compute(backend.get(), - input_params, - input_data, - input_shapes, - delta_data, - data_shape, - data_shape, - data_shape, - sigmoid_0, - sigmoid_1, - input_0_param, - input_1_param, - expected_0, - expected_1); - } - { - auto input_0_param = make_shared(element::f32, data_shape); - auto input_1_param = make_shared(element::f32, data_shape); - auto sigmoid_0 = make_shared(input_0_param); - auto sigmoid_1 = make_shared(input_1_param); - vector expected_0{7.00227f, 1.37874f, 0.196666f, 0.026807f}; - vector expected_1{4.64603f, 0.924027f, 0.131829f, 0.0179692f}; - op::ParameterVector input_params{input_0_param, input_1_param}; - vector> input_data{input_0_data, input_1_data}; - vector input_shapes{data_shape, data_shape}; - sigmoid_multiply_fusion_backward_compute(backend.get(), - input_params, - input_data, - input_shapes, - delta_data, - data_shape, - data_shape, - data_shape, - sigmoid_0, - sigmoid_1, - input_0_param, - input_1_param, - expected_0, - expected_1); - } -} - -TEST(cpu_fusion, fuse_batch_dot) -{ - pass::Manager pass_manager; - pass_manager.register_pass(); - const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/batch_dot_3.json"); - const string json_string = file_util::read_file_to_string(json_path); - stringstream ss(json_string); - shared_ptr func = ngraph::deserialize(ss); - pass_manager.run_passes(func); - size_t ccg = count_ops_of_type(func); - ASSERT_EQ(ccg, 1); -} - -TEST(cpu_fusion, fuse_batch_dot_forward) -{ - pass::Manager pass_manager; - pass_manager.register_pass(); - - const std::string file_name("mxnet/batch_dot_3.json"); - auto cpu_f = make_function(file_name); - auto int_f = make_function(file_name); - pass_manager.run_passes(cpu_f); - test::Uniform rng(0.0f, 1.0f); - vector> args; - - for (shared_ptr param : int_f->get_parameters()) - { - vector tensor_val(shape_size(param->get_shape())); - rng.initialize(tensor_val); - args.push_back(tensor_val); - } - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - for (size_t i = 0; i < int_results.size(); i++) - { - EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); - } -} - -TEST(cpu_fusion, fuse_rnn_across_layer) -{ - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.register_pass(); - pass_manager.register_pass(); - pass_manager.register_pass(); - const string json_path = - file_util::path_join(SERIALIZED_ZOO, "mxnet/2rnn_layer_1timestep.json"); - const string json_string = file_util::read_file_to_string(json_path); - stringstream ss(json_string); - shared_ptr func = ngraph::deserialize(ss); - pass_manager.run_passes(func); - size_t ref_rnn_count = 1; - auto rnn_count = count_ops_of_type(func); - EXPECT_EQ(ref_rnn_count, rnn_count); -} - -TEST(cpu_fusion, fuse_rnn_across_2layer_1timestep) -{ - const std::string file_name("mxnet/2rnn_layer_1timestep.json"); - auto cpu_f = make_function(file_name); - auto int_f = make_function(file_name); - test::Uniform rng(0.0f, 1.0f); - vector> args; - - for (shared_ptr param : int_f->get_parameters()) - { - vector tensor_val(shape_size(param->get_shape())); - rng.initialize(tensor_val); - args.push_back(tensor_val); - } - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - - // TODO (pruthvi): Enable this after fixing failing - // mxnet rnn unit tests - // EXPECT_EQ(1, count_ops_of_type(cpu_f)); - for (size_t i = 0; i < cpu_results.size(); i++) - { - EXPECT_TRUE(test::all_close(cpu_results.at(1), int_results.at(1), 1.0e-4f, 1.0e-4f)); - } -} - -static void check_bounded_relu(Shape param_shape, float constant_val) -{ - auto make_function = [](Shape input_shape, float alpha_val) { - auto relu_input = std::make_shared(element::f32, input_shape); - auto relu = std::make_shared(relu_input); - auto alpha = op::Constant::create( - element::f32, input_shape, std::vector(1.0f, alpha_val)); - auto min = std::make_shared(relu, alpha); - auto f = make_shared(NodeVector{min}, op::ParameterVector{relu_input}); - return f; - }; - - auto cpu_f = make_function(param_shape, constant_val); - auto int_f = make_function(param_shape, constant_val); - test::Uniform rng(-10.0f, 10.0f); - vector> args; - - for (shared_ptr param : int_f->get_parameters()) - { - vector tensor_val(shape_size(param->get_shape())); - rng.initialize(tensor_val); - args.push_back(tensor_val); - } - auto int_results = execute(int_f, args, "INTERPRETER"); - auto cpu_results = execute(cpu_f, args, "CPU"); - - EXPECT_EQ(1, count_ops_of_type(cpu_f)); - EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0), 1.0e-4f, 1.0e-4f)); -} - -TEST(cpu_fusion, fuse_bounded_relu_inter_vs_cpu) -{ - check_bounded_relu(Shape{4, 3, 2, 2}, 6.0f); - check_bounded_relu(Shape{4, 3}, 4.0f); - check_bounded_relu(Shape{4, 3, 2}, 2.0f); -} - -TEST(cpu_fusion, dot_batch_forward) -{ - const Shape shape_a{2, 3, 2}; - const Shape shape_b{2, 3}; - - auto generate_func = [&shape_a, &shape_b]() -> shared_ptr { - auto a = make_shared(element::f32, shape_a); - auto b = make_shared(element::f32, shape_b); - auto dot = make_shared(a, b); - return make_shared(dot, op::ParameterVector{a, b}); - }; - shared_ptr cpu_func = generate_func(); - shared_ptr int_func = generate_func(); - - test::Uniform rng(0.0f, 1.0f); - vector> args; - for (shared_ptr param : int_func->get_parameters()) - { - vector tensor_val(shape_size(param->get_shape())); - rng.initialize(tensor_val); - args.push_back(tensor_val); - } - - auto int_results = execute(int_func, args, "INTERPRETER"); - auto cpu_results = execute(cpu_func, args, "CPU"); - for (size_t i = 0; i < cpu_results.size(); i++) - { - EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); - } -} -static std::shared_ptr - create_rnn_input_linear_transformation_function(size_t num_timesteps, bool data_is_4d = false) -{ - auto W = std::make_shared(element::f32, Shape{400, 50}); - auto bias = std::make_shared(element::f32, Shape{400}); - op::ParameterVector params{W, bias}; - auto create_graph = [&]() -> std::shared_ptr { - - auto data_param = (data_is_4d) - ? std::make_shared(element::f32, Shape{2, 5, 1, 50}) - : std::make_shared(element::f32, Shape{10, 1, 50}); - params.push_back(data_param); - auto reshape_axis_order = data_is_4d ? AxisVector{0, 1, 2, 3} : AxisVector{0, 1, 2}; - auto data_param_reshape = - std::make_shared(data_param, reshape_axis_order, Shape{10, 50}); - auto W_reshape = std::make_shared(W, AxisVector{1, 0}, Shape{50, 400}); - auto dot = std::make_shared(data_param_reshape, W_reshape); - auto bias_broadcast = make_shared(bias, dot->get_shape(), AxisSet{0}); - auto add_bias = std::make_shared(dot, bias_broadcast); - return add_bias; - - }; - - NodeVector graph_nodes; - for (size_t i = 0; i < num_timesteps; i++) - { - graph_nodes.push_back(create_graph()); - } - auto concat = std::make_shared(graph_nodes, 0); - return make_shared(NodeVector{concat}, params); -} - -TEST(cpu_fusion, fuse_rnn_input_across_time_steps) -{ - auto func = create_rnn_input_linear_transformation_function(10); - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.register_pass(); - pass_manager.run_passes(func); - size_t ref_matmulbias_count = 1; - auto matmulbias_count = count_ops_of_type(func); - EXPECT_EQ(ref_matmulbias_count, matmulbias_count); -} - -TEST(cpu_fusion, fuse_rnn_input_across_time_steps_4d_data) -{ - auto func = create_rnn_input_linear_transformation_function(10, true); - pass::Manager pass_manager; - pass_manager.register_pass(); - pass_manager.register_pass(); - pass_manager.run_passes(func); - size_t ref_matmulbias_count = 10; // no CPURnnMatFusion transformations - auto matmulbias_count = count_ops_of_type(func); - EXPECT_EQ(ref_matmulbias_count, matmulbias_count); -} - -TEST(cpu_fusion, rnn_input_fusion_inter_vs_cpu) -{ - shared_ptr cpu_func = create_rnn_input_linear_transformation_function(10); - shared_ptr int_func = create_rnn_input_linear_transformation_function(10); - - test::Uniform rng(-10.0f, 10.0f); - vector> args; - for (shared_ptr param : int_func->get_parameters()) - { - vector tensor_val(shape_size(param->get_shape())); - rng.initialize(tensor_val); - args.push_back(tensor_val); - } - - auto int_results = execute(int_func, args, "INTERPRETER"); - auto cpu_results = execute(cpu_func, args, "CPU"); - for (size_t i = 0; i < cpu_results.size(); i++) - { - EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f)); - } -} From 23f68d06235fb948b82d15be1b8ad8fb72817513 Mon Sep 17 00:00:00 2001 From: Scott Cyphers Date: Tue, 13 Nov 2018 13:30:29 -0800 Subject: [PATCH 04/10] BatchNorm Python API changes. --- python/pyngraph/ops/batch_norm.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/pyngraph/ops/batch_norm.cpp b/python/pyngraph/ops/batch_norm.cpp index 11fb21c2e07..15bec7f6ae5 100644 --- a/python/pyngraph/ops/batch_norm.cpp +++ b/python/pyngraph/ops/batch_norm.cpp @@ -30,9 +30,9 @@ void regclass_pyngraph_op_BatchNormTraining(py::module m) batch_norm_training(m, "BatchNormTraining"); batch_norm_training.doc() = "ngraph.impl.op.BatchNormTraining wraps ngraph::op::BatchNormTraining"; - batch_norm_training.def(py::init&, + batch_norm_training.def(py::init&, const std::shared_ptr&, + double, const std::shared_ptr&>()); } @@ -45,11 +45,11 @@ void regclass_pyngraph_op_BatchNormInference(py::module m) batch_norm_inference.doc() = "ngraph.impl.op.BatchNormInference wraps ngraph::op::BatchNormInference"; - batch_norm_inference.def(py::init&, + batch_norm_inference.def(py::init&, const std::shared_ptr&, const std::shared_ptr&, const std::shared_ptr&, + double, const std::shared_ptr&>()); } @@ -61,11 +61,11 @@ void regclass_pyngraph_op_BatchNormTrainingBackprop(py::module m) batch_norm_training_backprop(m, "BatchNormTrainingBackprop"); batch_norm_training_backprop.doc() = "ngraph.impl.op.BatchNormTrainingBackprop wraps ngraph::op::BatchNormTrainingBackprop"; - batch_norm_training_backprop.def(py::init&, + batch_norm_training_backprop.def(py::init&, const std::shared_ptr&, const std::shared_ptr&, const std::shared_ptr&, const std::shared_ptr&, + double, const std::shared_ptr&>()); } From cc0295a25dcf6195bae5e588677193cd2a1e533e Mon Sep 17 00:00:00 2001 From: Scott Cyphers Date: Tue, 13 Nov 2018 14:39:42 -0800 Subject: [PATCH 05/10] Fix python signatures --- python/pyngraph/ops/batch_norm.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/pyngraph/ops/batch_norm.cpp b/python/pyngraph/ops/batch_norm.cpp index 15bec7f6ae5..df600f0b173 100644 --- a/python/pyngraph/ops/batch_norm.cpp +++ b/python/pyngraph/ops/batch_norm.cpp @@ -32,8 +32,8 @@ void regclass_pyngraph_op_BatchNormTraining(py::module m) "ngraph.impl.op.BatchNormTraining wraps ngraph::op::BatchNormTraining"; batch_norm_training.def(py::init&, const std::shared_ptr&, - double, - const std::shared_ptr&>()); + const std::shared_ptr&, + double>()); } void regclass_pyngraph_op_BatchNormInference(py::module m) @@ -49,8 +49,8 @@ void regclass_pyngraph_op_BatchNormInference(py::module m) const std::shared_ptr&, const std::shared_ptr&, const std::shared_ptr&, - double, - const std::shared_ptr&>()); + const std::shared_ptr&, + double>()); } void regclass_pyngraph_op_BatchNormTrainingBackprop(py::module m) @@ -66,6 +66,6 @@ void regclass_pyngraph_op_BatchNormTrainingBackprop(py::module m) const std::shared_ptr&, const std::shared_ptr&, const std::shared_ptr&, - double, - const std::shared_ptr&>()); + const std::shared_ptr&, + double>()); } From 78243d68b1608c93779d5ed8283ff65f0f76250f Mon Sep 17 00:00:00 2001 From: Scott Cyphers Date: Tue, 13 Nov 2018 15:22:59 -0800 Subject: [PATCH 06/10] Fix order --- python/ngraph/ops.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/ngraph/ops.py b/python/ngraph/ops.py index 0b6bb262c18..9c08b14bd0b 100644 --- a/python/ngraph/ops.py +++ b/python/ngraph/ops.py @@ -913,20 +913,20 @@ def reverse(node, reversed_axes, name=None): # type: (Node, List[int], str) -> @nameable_op -def batch_norm(eps, # type: float +def batch_norm(data, # type: Node gamma, # type: Node beta, # type: Node - data, # type: Node mean=None, # type: Node variance=None, # type: Node name=None, # type: str + eps=1e-5, # type: float ): # type: (...) -> Node """Return batch normalization node.""" if mean is None and variance is None: - return BatchNormTraining(eps, gamma, beta, data) + return BatchNormTraining(data, gamma, beta, eps) else: - return BatchNormInference(eps, gamma, beta, data, mean, variance) + return BatchNormInference(data, gamma, beta, mean, variance, eps) @nameable_op From 7175c3a55d1caf5a9694799d8474bf9f53ea296d Mon Sep 17 00:00:00 2001 From: Scott Cyphers Date: Tue, 13 Nov 2018 15:41:58 -0800 Subject: [PATCH 07/10] onnx batch_norm --- src/ngraph/frontend/onnx_import/op/batch_norm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ngraph/frontend/onnx_import/op/batch_norm.cpp b/src/ngraph/frontend/onnx_import/op/batch_norm.cpp index 11338882026..0bf423bd9d0 100644 --- a/src/ngraph/frontend/onnx_import/op/batch_norm.cpp +++ b/src/ngraph/frontend/onnx_import/op/batch_norm.cpp @@ -54,11 +54,11 @@ namespace ngraph mean = inputs.at(3); var = inputs.at(4); return {std::make_shared( - epsilon, scale, bias, x, mean, var)}; + x, scale, bias, mean, var, epsilon)}; } return { - std::make_shared(epsilon, scale, bias, x)}; + std::make_shared(x, scale, bias, epsilon)}; } } // namespace set_1 From 476c063447e71d286634b4d18e73e5dd263f96d5 Mon Sep 17 00:00:00 2001 From: Scott Cyphers Date: Tue, 13 Nov 2018 16:42:33 -0800 Subject: [PATCH 08/10] style --- python/pyngraph/ops/batch_norm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyngraph/ops/batch_norm.cpp b/python/pyngraph/ops/batch_norm.cpp index df600f0b173..2e7e32d97bd 100644 --- a/python/pyngraph/ops/batch_norm.cpp +++ b/python/pyngraph/ops/batch_norm.cpp @@ -32,7 +32,7 @@ void regclass_pyngraph_op_BatchNormTraining(py::module m) "ngraph.impl.op.BatchNormTraining wraps ngraph::op::BatchNormTraining"; batch_norm_training.def(py::init&, const std::shared_ptr&, - const std::shared_ptr&, + const std::shared_ptr&, double>()); } From c1ef39b0516069e5cf95cef026d58f73e00d0f4c Mon Sep 17 00:00:00 2001 From: Scott Cyphers Date: Tue, 13 Nov 2018 17:23:20 -0800 Subject: [PATCH 09/10] Revert python batch_norm args order --- python/ngraph/ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ngraph/ops.py b/python/ngraph/ops.py index 9c08b14bd0b..eae9664055c 100644 --- a/python/ngraph/ops.py +++ b/python/ngraph/ops.py @@ -913,13 +913,13 @@ def reverse(node, reversed_axes, name=None): # type: (Node, List[int], str) -> @nameable_op -def batch_norm(data, # type: Node +def batch_norm(eps=1e-5, # type: float gamma, # type: Node beta, # type: Node + data, # type: Node mean=None, # type: Node variance=None, # type: Node name=None, # type: str - eps=1e-5, # type: float ): # type: (...) -> Node """Return batch normalization node.""" From dc119a975e8796087f0c59db00591f865f6e96ed Mon Sep 17 00:00:00 2001 From: Scott Cyphers Date: Tue, 13 Nov 2018 17:45:54 -0800 Subject: [PATCH 10/10] No default on required op --- python/ngraph/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ngraph/ops.py b/python/ngraph/ops.py index eae9664055c..1cab4de7179 100644 --- a/python/ngraph/ops.py +++ b/python/ngraph/ops.py @@ -913,7 +913,7 @@ def reverse(node, reversed_axes, name=None): # type: (Node, List[int], str) -> @nameable_op -def batch_norm(eps=1e-5, # type: float +def batch_norm(eps, # type: float gamma, # type: Node beta, # type: Node data, # type: Node