From db958211d81f9bcc0f3b56eca477589b54b15c77 Mon Sep 17 00:00:00 2001
From: pradeep <pradeep@arrayfire.com>
Date: Mon, 9 Mar 2020 13:07:40 +0530
Subject: [PATCH 1/6] Bump up crate version to 3.7.0

- Update arrayfire submodule to v3.7.0 tag
- Update upstream arrayfire version in ci jobs
---
 .github/workflows/ci.yml | 2 +-
 Cargo.toml               | 2 +-
 arrayfire                | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 133b43ee7..f2512e920 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -13,7 +13,7 @@ jobs:
         name: Build and Test Wrapper
         runs-on: ubuntu-18.04
         env:
-            AF_VER: 3.6.4
+            AF_VER: 3.7.0
         steps:
             - name: Checkout Repository
               uses: actions/checkout@master
diff --git a/Cargo.toml b/Cargo.toml
index fcac08140..c104ea30f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,7 +2,7 @@
 edition       = "2018"
 name          = "arrayfire"
 description   = "ArrayFire is a high performance software library for parallel computing with an easy-to-use API. Its array based function set makes parallel programming simple. ArrayFire's multiple backends (CUDA, OpenCL and native CPU) make it platform independent and highly portable. A few lines of code in ArrayFire can replace dozens of lines of parallel computing code, saving you valuable time and lowering development costs. This crate provides Rust bindings for ArrayFire library."
-version       = "3.6.3"
+version       = "3.7.0"
 documentation = "http://arrayfire.github.io/arrayfire-rust/arrayfire/index.html"
 homepage      = "https://github.com/arrayfire/arrayfire"
 repository    = "https://github.com/arrayfire/arrayfire-rust"
diff --git a/arrayfire b/arrayfire
index b443e146b..fbea2aeb6 160000
--- a/arrayfire
+++ b/arrayfire
@@ -1 +1 @@
-Subproject commit b443e146b6747c73caaaae4d65dd6b3a32936745
+Subproject commit fbea2aeb6f7f2d277dcb0ab425a77bb18ed22291

From d816d4bb92daf21e9564161b98ebd285ddd08a6c Mon Sep 17 00:00:00 2001
From: pradeep <pradeep@arrayfire.com>
Date: Mon, 9 Mar 2020 12:35:12 +0530
Subject: [PATCH 2/6] Link to book on gh-pages so that docs.rs works

---
 src/lib.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 228e3ccd6..983f1c7fa 100755
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -19,8 +19,8 @@
 //! are patch/fix updates for `arrayfire-rust` & `ArrayFire` respectively,
 //! and they don't need to match.
 //!
-//! Please go through our [tutorials](./book/index.html) book for more explanations on how to
-//! use ArrayFire to speedup your code.
+//! Please go through our [tutorials](http://arrayfire.org/arrayfire-rust/arrayfire/book/index.html)
+//! book for more explanations on how to use ArrayFire to speedup your code.
 
 #![doc(
     html_logo_url = "http://www.arrayfire.com/logos/arrayfire_logo_symbol.png",

From 3e3f3a820aeec870520a80cd5f1814416accd8b7 Mon Sep 17 00:00:00 2001
From: pradeep <pradeep@arrayfire.com>
Date: Fri, 13 Mar 2020 12:53:05 +0530
Subject: [PATCH 3/6] Fix product functions output Array type

For boolean/char inputs to ArrayFire, the output of product operation
is char and everywhere else same as AggregateType alias.
---
 src/algorithm/mod.rs |  6 +++---
 src/util.rs          | 16 +++++++++++++++-
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/algorithm/mod.rs b/src/algorithm/mod.rs
index 26d7192a1..32dc694a4 100644
--- a/src/algorithm/mod.rs
+++ b/src/algorithm/mod.rs
@@ -143,7 +143,7 @@ dim_reduce_func_def!(
     ",
     product,
     af_product,
-    T::AggregateOutType
+    T::ProductOutType
 );
 
 dim_reduce_func_def!(
@@ -440,10 +440,10 @@ where
 /// # Return Values
 ///
 /// Array that is reduced along given dimension via multiplication operation
-pub fn product_nan<T>(input: &Array<T>, dim: i32, nanval: f64) -> Array<T::AggregateOutType>
+pub fn product_nan<T>(input: &Array<T>, dim: i32, nanval: f64) -> Array<T::ProductOutType>
 where
     T: HasAfEnum,
-    T::AggregateOutType: HasAfEnum,
+    T::ProductOutType: HasAfEnum,
 {
     let mut temp: i64 = 0;
     unsafe {
diff --git a/src/util.rs b/src/util.rs
index bfb986fd1..e2db215b1 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -182,13 +182,15 @@ pub trait HasAfEnum {
     /// aggregation of set of values for a given input type. Aggregate type
     /// alias points to below types for given input types:
     /// - `Self` for input types: `Complex<64>`, `Complex<f32>`, `f64`, `f32`, `i64`, `u64`
-    /// - `f32` for input types: `bool`
+    /// - `u32` for input types: `bool`
     /// - `u32` for input types: `u8`
     /// - `i32` for input types: `i16`
     /// - `u32` for input types: `u16`
     /// - `i32` for input types: `i32`
     /// - `u32` for input types: `u32`
     type AggregateOutType;
+    /// This type is different for b8 input type
+    type ProductOutType;
     /// This type alias points to the output type for given input type of
     /// sobel filter operation. Sobel filter output alias points to below
     /// types for given input types:
@@ -211,6 +213,7 @@ impl HasAfEnum for Complex<f32> {
     type ComplexOutType = Self;
     type MeanOutType = Self;
     type AggregateOutType = Self;
+    type ProductOutType = Self;
     type SobelOutType = Self;
 
     fn get_af_dtype() -> DType {
@@ -226,6 +229,7 @@ impl HasAfEnum for Complex<f64> {
     type ComplexOutType = Self;
     type MeanOutType = Self;
     type AggregateOutType = Self;
+    type ProductOutType = Self;
     type SobelOutType = Self;
 
     fn get_af_dtype() -> DType {
@@ -241,6 +245,7 @@ impl HasAfEnum for f32 {
     type ComplexOutType = Complex<f32>;
     type MeanOutType = Self;
     type AggregateOutType = Self;
+    type ProductOutType = Self;
     type SobelOutType = Self;
 
     fn get_af_dtype() -> DType {
@@ -256,6 +261,7 @@ impl HasAfEnum for f64 {
     type ComplexOutType = Complex<f64>;
     type MeanOutType = Self;
     type AggregateOutType = Self;
+    type ProductOutType = Self;
     type SobelOutType = Self;
 
     fn get_af_dtype() -> DType {
@@ -271,6 +277,7 @@ impl HasAfEnum for bool {
     type ComplexOutType = Complex<f32>;
     type MeanOutType = f32;
     type AggregateOutType = u32;
+    type ProductOutType = bool;
     type SobelOutType = i32;
 
     fn get_af_dtype() -> DType {
@@ -286,6 +293,7 @@ impl HasAfEnum for u8 {
     type ComplexOutType = Complex<f32>;
     type MeanOutType = f32;
     type AggregateOutType = u32;
+    type ProductOutType = u32;
     type SobelOutType = i32;
 
     fn get_af_dtype() -> DType {
@@ -301,6 +309,7 @@ impl HasAfEnum for i16 {
     type ComplexOutType = Complex<f32>;
     type MeanOutType = f32;
     type AggregateOutType = i32;
+    type ProductOutType = i32;
     type SobelOutType = i32;
 
     fn get_af_dtype() -> DType {
@@ -316,6 +325,7 @@ impl HasAfEnum for u16 {
     type ComplexOutType = Complex<f32>;
     type MeanOutType = f32;
     type AggregateOutType = u32;
+    type ProductOutType = u32;
     type SobelOutType = i32;
 
     fn get_af_dtype() -> DType {
@@ -331,6 +341,7 @@ impl HasAfEnum for i32 {
     type ComplexOutType = Complex<f32>;
     type MeanOutType = f32;
     type AggregateOutType = i32;
+    type ProductOutType = i32;
     type SobelOutType = i32;
 
     fn get_af_dtype() -> DType {
@@ -346,6 +357,7 @@ impl HasAfEnum for u32 {
     type ComplexOutType = Complex<f32>;
     type MeanOutType = f32;
     type AggregateOutType = u32;
+    type ProductOutType = u32;
     type SobelOutType = i32;
 
     fn get_af_dtype() -> DType {
@@ -361,6 +373,7 @@ impl HasAfEnum for i64 {
     type ComplexOutType = Complex<f64>;
     type MeanOutType = f64;
     type AggregateOutType = Self;
+    type ProductOutType = Self;
     type SobelOutType = i64;
 
     fn get_af_dtype() -> DType {
@@ -376,6 +389,7 @@ impl HasAfEnum for u64 {
     type ComplexOutType = Complex<f64>;
     type MeanOutType = f64;
     type AggregateOutType = Self;
+    type ProductOutType = Self;
     type SobelOutType = i64;
 
     fn get_af_dtype() -> DType {

From 07d302be0a7a4f52bc20f08f725227a62d016e5a Mon Sep 17 00:00:00 2001
From: pradeep <pradeep@arrayfire.com>
Date: Fri, 13 Mar 2020 12:56:51 +0530
Subject: [PATCH 4/6] Avoid generating dependency crates documentation

---
 scripts/generate_documentation.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/generate_documentation.sh b/scripts/generate_documentation.sh
index 69691f410..8c8f40cbf 100755
--- a/scripts/generate_documentation.sh
+++ b/scripts/generate_documentation.sh
@@ -2,6 +2,6 @@
 
 # this script meant to be run from the root of arrayfire-rust
 
-cargo rustdoc -- --html-in-header ./scripts/mathjax.script
+cargo rustdoc -p arrayfire -- --html-in-header ./scripts/mathjax.script
 
 mdbook build tutorials-book && cp -r tutorials-book/book ./target/doc/arrayfire/

From 0e707f20a7df4ac350895a10d6421942b76d0f9d Mon Sep 17 00:00:00 2001
From: pradeep <pradeep@arrayfire.com>
Date: Fri, 13 Mar 2020 12:57:13 +0530
Subject: [PATCH 5/6] Enable inline math equation delimiters in mathjax script
 config

---
 scripts/mathjax.script | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/scripts/mathjax.script b/scripts/mathjax.script
index 176a75dcc..a3050b32a 100644
--- a/scripts/mathjax.script
+++ b/scripts/mathjax.script
@@ -1,3 +1,12 @@
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    tex2jax: {
+      inlineMath: [ ['$','$'], ["\\(","\\)"] ],
+      processEscapes: true
+    }
+  });
+</script>
+
 <script type="text/javascript" async
   src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.4/MathJax.js?config=TeX-MML-AM_CHTML">
 </script>

From a313553583f032134ba85489e8f29f840cfbf516 Mon Sep 17 00:00:00 2001
From: pradeep <pradeep@arrayfire.com>
Date: Fri, 13 Mar 2020 13:05:20 +0530
Subject: [PATCH 6/6] Update API to reflect ArrayFire 3.7.0 release

---
 Cargo.toml             |   9 +-
 examples/conway.rs     |   2 +-
 examples/using_half.rs |  15 +++
 src/algorithm/mod.rs   | 260 +++++++++++++++++++++++++++++++++++-
 src/arith/mod.rs       |   7 +
 src/array.rs           |  24 +++-
 src/blas/mod.rs        | 143 +++++++++++++++++++-
 src/data/mod.rs        |  46 ++++++-
 src/defines.rs         |  66 ++++++++++
 src/error.rs           |   4 +-
 src/event.rs           |  79 +++++++++++
 src/graphics.rs        |  40 ++++++
 src/image/mod.rs       | 212 ++++++++++++++++++++++++++++++
 src/lapack/mod.rs      |  35 +++++
 src/lib.rs             |   9 ++
 src/machinelearning.rs | 144 ++++++++++++++++++++
 src/signal/mod.rs      | 292 ++++++++++++++++++++++++++++++++++++++++-
 src/statistics/mod.rs  |  54 +++++++-
 src/util.rs            |  41 ++++++
 19 files changed, 1468 insertions(+), 14 deletions(-)
 create mode 100644 examples/using_half.rs
 create mode 100644 src/event.rs
 create mode 100644 src/machinelearning.rs

diff --git a/Cargo.toml b/Cargo.toml
index c104ea30f..1506cebb2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -27,6 +27,7 @@ indexing = []
 graphics = []
 image = []
 lapack = []
+machine_learning = []
 macros = []
 random = []
 signal = []
@@ -34,15 +35,17 @@ sparse = []
 statistics = []
 vision = []
 default = ["algorithm", "arithmetic", "blas", "data", "indexing", "graphics", "image", "lapack",
-"macros", "random", "signal", "sparse", "statistics", "vision"]
+"machine_learning", "macros", "random", "signal", "sparse", "statistics", "vision"]
 
 [dependencies]
 libc = "0.2"
 num  = "0.2"
 lazy_static = "1.0"
+half = "1.5.0"
 
 [dev-dependencies]
 float-cmp = "0.6.0"
+half = "1.5.0"
 
 [build-dependencies]
 serde_json = "1.0"
@@ -85,3 +88,7 @@ path = "examples/conway.rs"
 [[example]]
 name = "fft"
 path = "examples/fft.rs"
+
+[[example]]
+name = "using_half"
+path = "examples/using_half.rs"
diff --git a/examples/conway.rs b/examples/conway.rs
index 75b480d0e..8762a8a05 100644
--- a/examples/conway.rs
+++ b/examples/conway.rs
@@ -7,7 +7,7 @@ fn main() {
 }
 
 fn normalise(a: &Array<f32>) -> Array<f32> {
-    (a / (max_all(&abs(a)).0 as f32))
+    a / (max_all(&abs(a)).0 as f32)
 }
 
 fn conways_game_of_life() {
diff --git a/examples/using_half.rs b/examples/using_half.rs
new file mode 100644
index 000000000..fd45ddc49
--- /dev/null
+++ b/examples/using_half.rs
@@ -0,0 +1,15 @@
+use arrayfire::*;
+use half::f16;
+
+fn main() {
+    set_device(0);
+    info();
+
+    let values: Vec<_> = (1u8..101).map(f32::from).collect();
+
+    let half_values = values.iter().map(|&x| f16::from_f32(x)).collect::<Vec<_>>();
+
+    let hvals = Array::new(&half_values, Dim4::new(&[10, 10, 1, 1]));
+
+    print(&hvals);
+}
diff --git a/src/algorithm/mod.rs b/src/algorithm/mod.rs
index 32dc694a4..0bcc89e91 100644
--- a/src/algorithm/mod.rs
+++ b/src/algorithm/mod.rs
@@ -5,7 +5,7 @@ use crate::array::Array;
 use crate::defines::{AfError, BinaryOp};
 use crate::error::HANDLE_ERROR;
 use crate::util::{AfArray, MutAfArray, MutDouble, MutUint};
-use crate::util::{HasAfEnum, RealNumber, Scanable};
+use crate::util::{HasAfEnum, RealNumber, ReduceByKeyInput, Scanable};
 
 #[allow(dead_code)]
 extern "C" {
@@ -59,6 +59,71 @@ extern "C" {
         op: c_uint,
         inclusive: c_int,
     ) -> c_int;
+    fn af_all_true_by_key(
+        keys_out: MutAfArray,
+        vals_out: MutAfArray,
+        keys: AfArray,
+        vals: AfArray,
+        dim: c_int,
+    ) -> c_int;
+    fn af_any_true_by_key(
+        keys_out: MutAfArray,
+        vals_out: MutAfArray,
+        keys: AfArray,
+        vals: AfArray,
+        dim: c_int,
+    ) -> c_int;
+    fn af_count_by_key(
+        keys_out: MutAfArray,
+        vals_out: MutAfArray,
+        keys: AfArray,
+        vals: AfArray,
+        dim: c_int,
+    ) -> c_int;
+    fn af_max_by_key(
+        keys_out: MutAfArray,
+        vals_out: MutAfArray,
+        keys: AfArray,
+        vals: AfArray,
+        dim: c_int,
+    ) -> c_int;
+    fn af_min_by_key(
+        keys_out: MutAfArray,
+        vals_out: MutAfArray,
+        keys: AfArray,
+        vals: AfArray,
+        dim: c_int,
+    ) -> c_int;
+    fn af_product_by_key(
+        keys_out: MutAfArray,
+        vals_out: MutAfArray,
+        keys: AfArray,
+        vals: AfArray,
+        dim: c_int,
+    ) -> c_int;
+    fn af_product_by_key_nan(
+        keys_out: MutAfArray,
+        vals_out: MutAfArray,
+        keys: AfArray,
+        vals: AfArray,
+        dim: c_int,
+        nan_val: c_double,
+    ) -> c_int;
+    fn af_sum_by_key(
+        keys_out: MutAfArray,
+        vals_out: MutAfArray,
+        keys: AfArray,
+        vals: AfArray,
+        dim: c_int,
+    ) -> c_int;
+    fn af_sum_by_key_nan(
+        keys_out: MutAfArray,
+        vals_out: MutAfArray,
+        keys: AfArray,
+        vals: AfArray,
+        dim: c_int,
+        nan_val: c_double,
+    ) -> c_int;
 }
 
 macro_rules! dim_reduce_func_def {
@@ -527,7 +592,8 @@ all_reduce_func_def!(
     let dims = Dim4::new(&[5, 5, 1, 1]);
     let a = randu::<f32>(dims);
     print(&a);
-    println!(\"Result : {:?}\", product_all(&a));
+    let res = product_all(&a);
+    println!(\"Result : {:?}\", res);
     ```
     ",
     product_all,
@@ -1137,3 +1203,193 @@ where
     }
     temp.into()
 }
+
+macro_rules! dim_reduce_by_key_func_def {
+    ($brief_str: expr, $ex_str: expr, $fn_name: ident, $ffi_name: ident, $out_type: ty) => {
+        #[doc=$brief_str]
+        /// # Parameters
+        ///
+        /// - `keys` - key Array
+        /// - `vals` - value Array
+        /// - `dim`   - Dimension along which the input Array is reduced
+        ///
+        /// # Return Values
+        ///
+        /// Tuple of Arrays, with output keys and values after reduction
+        ///
+        #[doc=$ex_str]
+        pub fn $fn_name<KeyType, ValueType>(keys: &Array<KeyType>, vals: &Array<ValueType>,
+                                            dim: i32
+        ) -> (Array<KeyType>, Array<$out_type>)
+        where
+            KeyType: ReduceByKeyInput,
+            ValueType: HasAfEnum,
+            $out_type: HasAfEnum,
+        {
+            let mut out_keys: i64 = 0;
+            let mut out_vals: i64 = 0;
+            unsafe {
+                let err_val = $ffi_name(
+                    &mut out_keys as MutAfArray,
+                    &mut out_vals as MutAfArray,
+                    keys.get() as AfArray,
+                    vals.get() as AfArray,
+                    dim as c_int,
+                );
+                HANDLE_ERROR(AfError::from(err_val));
+            }
+            (out_keys.into(), out_vals.into())
+        }
+    };
+}
+
+dim_reduce_by_key_func_def!(
+    "
+    Key based AND of elements along a given dimension
+
+    All positive non-zero values are considered true, while negative and zero
+    values are considered as false.
+    ",
+    "
+    # Examples
+    ```rust
+    use arrayfire::{Dim4, print, randu, all_true_by_key};
+    let dims = Dim4::new(&[5, 3, 1, 1]);
+    let vals = randu::<f32>(dims);
+    let keys = randu::<u32>(Dim4::new(&[5, 1, 1, 1]));
+    print(&vals);
+    print(&keys);
+    let (out_keys, out_vals) = all_true_by_key(&keys, &vals, 0);
+    print(&out_keys);
+    print(&out_vals);
+    ```
+    ",
+    all_true_by_key,
+    af_all_true_by_key,
+    ValueType::AggregateOutType
+);
+
+dim_reduce_by_key_func_def!(
+    "
+    Key based OR of elements along a given dimension
+
+    All positive non-zero values are considered true, while negative and zero
+    values are considered as false.
+    ",
+    "
+    # Examples
+    ```rust
+    use arrayfire::{Dim4, print, randu, any_true_by_key};
+    let dims = Dim4::new(&[5, 3, 1, 1]);
+    let vals = randu::<f32>(dims);
+    let keys = randu::<u32>(Dim4::new(&[5, 1, 1, 1]));
+    print(&vals);
+    print(&keys);
+    let (out_keys, out_vals) = any_true_by_key(&keys, &vals, 0);
+    print(&out_keys);
+    print(&out_vals);
+    ```
+    ",
+    any_true_by_key,
+    af_any_true_by_key,
+    ValueType::AggregateOutType
+);
+
+dim_reduce_by_key_func_def!(
+    "Find total count of elements with similar keys along a given dimension",
+    "",
+    count_by_key,
+    af_count_by_key,
+    ValueType::AggregateOutType
+);
+
+dim_reduce_by_key_func_def!(
+    "Find maximum among values of similar keys along a given dimension",
+    "",
+    max_by_key,
+    af_max_by_key,
+    ValueType::AggregateOutType
+);
+
+dim_reduce_by_key_func_def!(
+    "Find minimum among values of similar keys along a given dimension",
+    "",
+    min_by_key,
+    af_min_by_key,
+    ValueType::AggregateOutType
+);
+
+dim_reduce_by_key_func_def!(
+    "Find product of all values with similar keys along a given dimension",
+    "",
+    product_by_key,
+    af_product_by_key,
+    ValueType::ProductOutType
+);
+
+dim_reduce_by_key_func_def!(
+    "Find sum of all values with similar keys along a given dimension",
+    "",
+    sum_by_key,
+    af_sum_by_key,
+    ValueType::AggregateOutType
+);
+
+macro_rules! dim_reduce_by_key_nan_func_def {
+    ($brief_str: expr, $ex_str: expr, $fn_name: ident, $ffi_name: ident, $out_type: ty) => {
+        #[doc=$brief_str]
+        ///
+        /// This version of sum by key can replaced all NaN values in the input
+        /// with a user provided value before performing the reduction operation.
+        /// # Parameters
+        ///
+        /// - `keys` - key Array
+        /// - `vals` - value Array
+        /// - `dim`   - Dimension along which the input Array is reduced
+        ///
+        /// # Return Values
+        ///
+        /// Tuple of Arrays, with output keys and values after reduction
+        ///
+        #[doc=$ex_str]
+        pub fn $fn_name<KeyType, ValueType>(keys: &Array<KeyType>, vals: &Array<ValueType>,
+                                            dim: i32, replace_value: f64
+        ) -> (Array<KeyType>, Array<$out_type>)
+        where
+            KeyType: ReduceByKeyInput,
+            ValueType: HasAfEnum,
+            $out_type: HasAfEnum,
+        {
+            let mut out_keys: i64 = 0;
+            let mut out_vals: i64 = 0;
+            unsafe {
+                let err_val = $ffi_name(
+                    &mut out_keys as MutAfArray,
+                    &mut out_vals as MutAfArray,
+                    keys.get() as AfArray,
+                    vals.get() as AfArray,
+                    dim as c_int,
+                    replace_value as c_double,
+                );
+                HANDLE_ERROR(AfError::from(err_val));
+            }
+            (out_keys.into(), out_vals.into())
+        }
+    };
+}
+
+dim_reduce_by_key_nan_func_def!(
+    "Compute sum of all values with similar keys along a given dimension",
+    "",
+    sum_by_key_nan,
+    af_sum_by_key_nan,
+    ValueType::AggregateOutType
+);
+
+dim_reduce_by_key_nan_func_def!(
+    "Compute product of all values with similar keys along a given dimension",
+    "",
+    product_by_key_nan,
+    af_product_by_key_nan,
+    ValueType::ProductOutType
+);
diff --git a/src/arith/mod.rs b/src/arith/mod.rs
index 25eb1ee12..fe7e1ccca 100644
--- a/src/arith/mod.rs
+++ b/src/arith/mod.rs
@@ -85,6 +85,7 @@ extern "C" {
     fn af_log10(out: MutAfArray, arr: AfArray) -> c_int;
     fn af_log2(out: MutAfArray, arr: AfArray) -> c_int;
     fn af_sqrt(out: MutAfArray, arr: AfArray) -> c_int;
+    fn af_rsqrt(out: MutAfArray, arr: AfArray) -> c_int;
     fn af_cbrt(out: MutAfArray, arr: AfArray) -> c_int;
     fn af_factorial(out: MutAfArray, arr: AfArray) -> c_int;
     fn af_tgamma(out: MutAfArray, arr: AfArray) -> c_int;
@@ -199,6 +200,12 @@ unary_func!("Compute the natural logarithm", log, af_log, UnaryOutType);
 unary_func!("Compute sin", sin, af_sin, UnaryOutType);
 unary_func!("Compute sinh", sinh, af_sinh, UnaryOutType);
 unary_func!("Compute the square root", sqrt, af_sqrt, UnaryOutType);
+unary_func!(
+    "Compute the reciprocal square root",
+    rsqrt,
+    af_rsqrt,
+    UnaryOutType
+);
 unary_func!("Compute tan", tan, af_tan, UnaryOutType);
 unary_func!("Compute tanh", tanh, af_tanh, UnaryOutType);
 
diff --git a/src/array.rs b/src/array.rs
index db2b55a15..842ea6247 100644
--- a/src/array.rs
+++ b/src/array.rs
@@ -166,12 +166,29 @@ where
     ///
     /// # Examples
     ///
+    /// An example of creating an Array from f32 array
+    ///
     /// ```rust
     /// use arrayfire::{Array, Dim4, print};
     /// let values: [f32; 3] = [1.0, 2.0, 3.0];
     /// let indices = Array::new(&values, Dim4::new(&[3, 1, 1, 1]));
     /// print(&indices);
     /// ```
+    /// An example of creating an Array from half::f16 array
+    ///
+    /// ```rust
+    /// use arrayfire::{Array, Dim4, print};
+    /// use half::f16;
+    ///
+    /// let values: [f32; 3] = [1.0, 2.0, 3.0];
+    ///
+    /// let half_values = values.iter().map(|&x| f16::from_f32(x)).collect::<Vec<_>>();
+    ///
+    /// let hvals = Array::new(&half_values, Dim4::new(&[3, 1, 1, 1]));
+    ///
+    /// print(&hvals);
+    /// ```
+    ///
     #[allow(unused_mut)]
     pub fn new(slice: &[T], dims: Dim4) -> Self {
         let aftype = T::get_af_dtype();
@@ -218,7 +235,7 @@ where
     ///
     /// ```rust
     /// use arrayfire::{Array, Dim4};
-    /// let garbageVals = Array::<f32>::new_empty(Dim4::new(&[3, 1, 1, 1]));
+    /// let garbage_vals = Array::<f32>::new_empty(Dim4::new(&[3, 1, 1, 1]));
     /// ```
     #[allow(unused_mut)]
     pub fn new_empty(dims: Dim4) -> Self {
@@ -353,6 +370,11 @@ where
         self.handle
     }
 
+    /// Returns the native FFI handle for Rust object `Array`
+    pub fn set(&mut self, handle: i64) {
+        self.handle = handle;
+    }
+
     /// Copies the data from the Array to the mutable slice `data`
     pub fn host<O: HasAfEnum>(&self, data: &mut [O]) {
         if data.len() != self.elements() {
diff --git a/src/blas/mod.rs b/src/blas/mod.rs
index e7796fd45..a607f4f52 100644
--- a/src/blas/mod.rs
+++ b/src/blas/mod.rs
@@ -1,15 +1,25 @@
 extern crate libc;
 
-use self::libc::{c_int, c_uint};
+use self::libc::{c_int, c_uint, c_void};
 use crate::array::Array;
-use crate::defines::AfError;
-use crate::defines::MatProp;
+use crate::defines::{AfError, CublasMathMode, MatProp};
 use crate::error::HANDLE_ERROR;
 use crate::util::{to_u32, AfArray, MutAfArray};
 use crate::util::{FloatingPoint, HasAfEnum};
+use std::vec::Vec;
 
 #[allow(dead_code)]
 extern "C" {
+    fn af_gemm(
+        out: MutAfArray,
+        optlhs: c_uint,
+        optrhs: c_uint,
+        alpha: *const c_void,
+        lhs: AfArray,
+        rhs: AfArray,
+        beta: *const c_void,
+    ) -> c_int;
+
     fn af_matmul(
         out: MutAfArray,
         lhs: AfArray,
@@ -23,6 +33,116 @@ extern "C" {
 
     fn af_transpose(out: MutAfArray, arr: AfArray, conjugate: c_int) -> c_int;
     fn af_transpose_inplace(arr: AfArray, conjugate: c_int) -> c_int;
+
+    fn afcu_cublasSetMathMode(mode: c_int) -> c_int;
+}
+
+/// BLAS general matrix multiply (GEMM) of two Array objects
+///
+///
+/// This provides a general interface to the BLAS level 3 general matrix multiply (GEMM),
+/// which is generally defined as:
+///
+/// \begin{equation}
+///     C = \alpha * opA(A)opB(B) + \beta * C
+/// \end{equation}
+///
+///   where $\alpha$ (**alpha**) and $\beta$ (**beta**) are both scalars; $A$ and $B$ are the matrix
+///   multiply operands; and $opA$ and $opB$ are noop
+///   (if optLhs is [MatProp::NONE](./enum.MatProp.html)) or transpose
+///   (if optLhs is [MatProp::TRANS](./enum.MatProp.html)) operations on $A$ or $B$ before the
+///   actual GEMM operation. Batched GEMM is supported if at least either $A$ or $B$ have more than
+///   two dimensions (see [af::matmul](http://arrayfire.org/docs/group__blas__func__matmul.htm#ga63306b6ed967bd1055086db862fe885b)
+///   for more details on broadcasting). However, only one **alpha** and one **beta** can be used
+///   for all of the batched matrix operands.
+///
+///   The `output` Array can be used both as an input and output. An allocation will be performed
+///   if you pass an empty Array (i.e. `let c: Array<f32> = (0 as i64).into();`). If a valid Array
+///   is passed as $C$, the operation will be performed on that Array itself. The C Array must be
+///   the correct type and shape; otherwise, an error will be thrown.
+///
+///   Note: Passing an Array that has not been initialized to the C array
+///   will cause undefined behavior.
+///
+/// # Examples
+///
+/// Given below is an example of using gemm API with existing Arrays
+///
+/// ```rust
+/// use arrayfire::{Array, Dim4, print, randu, gemm};
+///
+/// let dims = Dim4::new(&[5, 5, 1, 1]);
+///
+/// let alpha = vec![1.0 as f32];
+/// let  beta = vec![2.0 as f32];
+///
+/// let lhs = randu::<f32>(dims);
+/// let rhs = randu::<f32>(dims);
+///
+/// let mut result = Array::new_empty(dims);
+/// gemm(&mut result, arrayfire::MatProp::NONE, arrayfire::MatProp::NONE,
+///      alpha, &lhs, &rhs, beta);
+/// ```
+///
+/// If you don't have an existing Array, you can also use gemm in the following fashion.
+/// However, if there is no existing Array that you need to fill and your use case doesn't
+/// deal with alpha and beta from gemm equation, it is recommended to use
+/// [matmul](./fn.matmul.html) for more terse code.
+///
+/// ```rust
+/// use arrayfire::{Array, Dim4, print, randu, gemm};
+///
+/// let dims = Dim4::new(&[5, 5, 1, 1]);
+///
+/// let alpha = vec![1.0 as f32];
+/// let  beta = vec![2.0 as f32];
+///
+/// let lhs = randu::<f32>(dims);
+/// let rhs = randu::<f32>(dims);
+///
+/// let mut result: Array::<f32> = (0 as i64).into();
+///
+/// gemm(&mut result, arrayfire::MatProp::NONE, arrayfire::MatProp::NONE,
+///      alpha, &lhs, &rhs, beta);
+/// ```
+///
+/// # Parameters
+///
+/// - `optlhs` - Transpose left hand side before the function is performed, uses one of the values of [MatProp](./enum.MatProp.html)
+/// - `optrhs` - Transpose right hand side before the function is performed, uses one of the values of [MatProp](./enum.MatProp.html)
+/// - `alpha` is alpha value;
+/// - `lhs` is the Array on left hand side
+/// - `rhs` is the Array on right hand side
+/// - `beta` is beta value;
+///
+/// # Return Values
+///
+/// Array, result of gemm operation
+pub fn gemm<T>(
+    output: &mut Array<T>,
+    optlhs: MatProp,
+    optrhs: MatProp,
+    alpha: Vec<T>,
+    lhs: &Array<T>,
+    rhs: &Array<T>,
+    beta: Vec<T>,
+) where
+    T: HasAfEnum + FloatingPoint,
+{
+    let mut out = output.get();
+    unsafe {
+        let err_val = af_gemm(
+            &mut out as MutAfArray,
+            to_u32(optlhs) as c_uint,
+            to_u32(optrhs) as c_uint,
+            alpha.as_ptr() as *const c_void,
+            lhs.get() as AfArray,
+            rhs.get() as AfArray,
+            beta.as_ptr() as *const c_void,
+        );
+        HANDLE_ERROR(AfError::from(err_val));
+    }
+    output.set(out);
 }
 
 /// Matrix multiple of two Arrays
@@ -128,3 +248,20 @@ pub fn transpose_inplace<T: HasAfEnum>(arr: &mut Array<T>, conjugate: bool) {
         HANDLE_ERROR(AfError::from(err_val));
     }
 }
+
+/// Sets the cuBLAS math mode for the internal handle.
+///
+/// See the cuBLAS documentation for additional details
+///
+/// # Parameters
+///
+/// - `mode` takes a value of [CublasMathMode](./enum.CublasMathMode.html) enum
+pub fn set_cublas_mode(mode: CublasMathMode) {
+    unsafe {
+        afcu_cublasSetMathMode(mode as c_int);
+        //let err_val = afcu_cublasSetMathMode(mode as c_int);
+        // FIXME(wonder if this something to throw off,
+        // the program state is not invalid or anything
+        // HANDLE_ERROR(AfError::from(err_val));
+    }
+}
diff --git a/src/data/mod.rs b/src/data/mod.rs
index 67e9b3d49..32aade7ba 100644
--- a/src/data/mod.rs
+++ b/src/data/mod.rs
@@ -4,7 +4,7 @@ extern crate num;
 use self::libc::{c_double, c_int, c_uint};
 use self::num::Complex;
 use crate::array::Array;
-use crate::defines::AfError;
+use crate::defines::{AfError, BorderType};
 use crate::dim4::Dim4;
 use crate::error::HANDLE_ERROR;
 use crate::util::{AfArray, DimT, HasAfEnum, Intl, MutAfArray, Uintl};
@@ -73,6 +73,16 @@ extern "C" {
 
     fn af_replace(a: MutAfArray, cond: AfArray, b: AfArray) -> c_int;
     fn af_replace_scalar(a: MutAfArray, cond: AfArray, b: c_double) -> c_int;
+
+    fn af_pad(
+        out: MutAfArray,
+        input: AfArray,
+        begin_ndims: c_uint,
+        begin_dims: *const DimT,
+        end_ndims: c_uint,
+        end_dims: *const DimT,
+        pad_fill_type: c_int,
+    ) -> c_int;
 }
 
 /// Type Trait to generate a constant [Array](./struct.Array.html) of given size
@@ -918,3 +928,37 @@ where
         HANDLE_ERROR(AfError::from(err_val));
     }
 }
+
+/// Pad input Array along borders
+///
+/// # Parameters
+///
+/// - `input` is the input array to be padded
+/// - `begin` is padding size before first element along a given dimension
+/// - `end` is padding size after the last element along a given dimension
+/// - `fill_type` indicates what values should be used to fill padded regions
+///
+/// # Return Values
+///
+/// Padded Array
+pub fn pad<T: HasAfEnum>(
+    input: &Array<T>,
+    begin: Dim4,
+    end: Dim4,
+    fill_type: BorderType,
+) -> Array<T> {
+    let mut temp: i64 = 0;
+    unsafe {
+        let err_val = af_pad(
+            &mut temp as MutAfArray,
+            input.get() as AfArray,
+            begin.ndims() as c_uint,
+            begin.get().as_ptr() as *const DimT,
+            end.ndims() as c_uint,
+            end.get().as_ptr() as *const DimT,
+            fill_type as c_int,
+        );
+        HANDLE_ERROR(AfError::from(err_val));
+    }
+    temp.into()
+}
diff --git a/src/defines.rs b/src/defines.rs
index 5633f9fe2..7c7bb2b9d 100644
--- a/src/defines.rs
+++ b/src/defines.rs
@@ -136,6 +136,8 @@ pub enum DType {
     S16 = 10,
     /// 16 bit unsigned integer
     U16 = 11,
+    /// 16 bit floating point
+    F16 = 12,
 }
 
 /// Dictates the interpolation method to be used by a function
@@ -172,6 +174,12 @@ pub enum BorderType {
     ZERO = 0,
     /// Pad using mirrored values along border
     SYMMETRIC = 1,
+
+    /// Out of bound values are clamped to the edge
+    CLAMP_TO_EDGE,
+
+    /// Out of bound values are mapped to range of the dimension in cyclic fashion
+    PERIODIC,
 }
 
 /// Used by `regions` function to identify type of connectivity
@@ -496,3 +504,61 @@ pub enum TopkFn {
     /// Default option(max)
     DEFAULT = 0,
 }
+
+/// Iterative Deconvolution Algorithm
+#[repr(u32)]
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum IterativeDeconvAlgo {
+    /// Land-Weber Algorithm
+    LANDWEBER = 1,
+    /// Richardson-Lucy Algorithm
+    RICHARDSONLUCY = 2,
+    /// Default is Land-Weber algorithm
+    DEFAULT = 0,
+}
+
+/// Inverse Deconvolution Algorithm
+#[repr(u32)]
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum InverseDeconvAlgo {
+    /// Tikhonov algorithm
+    TIKHONOV = 1,
+    /// Default is Tikhonov algorithm
+    DEFAULT = 0,
+}
+
+/// Gradient mode for convolution
+#[repr(u32)]
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum ConvGradientType {
+    /// Filter Gradient
+    FILTER = 1,
+    /// Data Gradient
+    DATA = 2,
+    /// Biased Gradient
+    BIAS = 3,
+    /// Default is Data Gradient
+    DEFAULT = 0,
+}
+
+/// Gradient mode for convolution
+#[repr(u32)]
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum VarianceBias {
+    /// Sample variance
+    SAMPLE = 1,
+    /// Population variance
+    POPULATION = 2,
+    /// Default (Population) variance
+    DEFAULT = 0,
+}
+
+/// Gradient mode for convolution
+#[repr(u32)]
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum CublasMathMode {
+    /// To indicate use of Tensor Cores on CUDA capable GPUs
+    TENSOR_OP = 1,
+    /// Default i.e. tensor core operations will be avoided by the library
+    DEFAULT = 0,
+}
diff --git a/src/error.rs b/src/error.rs
index a544b58d0..b43fef6c9 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -60,7 +60,7 @@ lazy_static! {
 /// use arrayfire::{AfError, Callback, info, register_error_handler};
 /// use std::error::Error;
 ///
-/// fn handleError(error_code: AfError) {
+/// fn handle_error(error_code: AfError) {
 ///     match error_code {
 ///         AfError::SUCCESS => {}, /* No-op */
 ///         _ => panic!("Error message: {}", error_code.description()),
@@ -71,7 +71,7 @@ lazy_static! {
 ///     //Registering the error handler should be the first call
 ///     //before any other functions are called if your version
 ///     //of error is to be used for subsequent function calls
-///     register_error_handler(Callback::new(handleError));
+///     register_error_handler(Callback::new(handle_error));
 ///
 ///     info();
 /// }
diff --git a/src/event.rs b/src/event.rs
new file mode 100644
index 000000000..e609d9b64
--- /dev/null
+++ b/src/event.rs
@@ -0,0 +1,79 @@
+extern crate libc;
+
+use self::libc::c_int;
+use crate::defines::AfError;
+use crate::error::HANDLE_ERROR;
+use crate::util::{AfEvent, MutAfEvent};
+
+use std::default::Default;
+
+#[allow(dead_code)]
+extern "C" {
+    fn af_create_event(out: MutAfEvent) -> c_int;
+    fn af_delete_event(out: AfEvent) -> c_int;
+    fn af_mark_event(out: AfEvent) -> c_int;
+    fn af_enqueue_wait_event(out: AfEvent) -> c_int;
+    fn af_block_event(out: AfEvent) -> c_int;
+}
+
+/// RAII construct to manage ArrayFire events
+pub struct Event {
+    event_handle: i64,
+}
+
+impl Default for Event {
+    fn default() -> Self {
+        let mut temp: i64 = 0;
+        unsafe {
+            let err_val = af_create_event(&mut temp as MutAfEvent);
+            HANDLE_ERROR(AfError::from(err_val));
+        }
+        Self { event_handle: temp }
+    }
+}
+
+impl Event {
+    /// Marks the event on the active computation queue.
+    ///
+    /// If the event is enqueued/waited on later, any operations that are currently
+    /// enqueued on the event queue will be completed before any events that are
+    /// enqueued after the call to enqueue
+    pub fn mark(&self) {
+        unsafe {
+            let err_val = af_mark_event(self.event_handle as AfEvent);
+            HANDLE_ERROR(AfError::from(err_val));
+        }
+    }
+
+    /// Enqueues the event and all enqueued events on the active queue
+    ///
+    /// All operations enqueued after a call to enqueue will not be executed
+    /// until operations on the queue when mark was called are complete
+    pub fn enqueue_wait(&self) {
+        unsafe {
+            let err_val = af_enqueue_wait_event(self.event_handle as AfEvent);
+            HANDLE_ERROR(AfError::from(err_val));
+        }
+    }
+
+    /// Blocks the calling thread on events until all events on the computation
+    /// stream before mark was called are complete
+    pub fn block(&self) {
+        unsafe {
+            let err_val = af_block_event(self.event_handle as AfEvent);
+            HANDLE_ERROR(AfError::from(err_val));
+        }
+    }
+}
+
+impl Drop for Event {
+    fn drop(&mut self) {
+        unsafe {
+            let ret_val = af_delete_event(self.event_handle as AfEvent);
+            match ret_val {
+                0 => (),
+                _ => panic!("Failed to delete event resources: {}", ret_val),
+            }
+        }
+    }
+}
diff --git a/src/graphics.rs b/src/graphics.rs
index 6b3209b66..ea1d38975 100644
--- a/src/graphics.rs
+++ b/src/graphics.rs
@@ -25,6 +25,13 @@ extern "C" {
         ztitle: *const c_char,
         props: CellPtr,
     ) -> c_int;
+    fn af_set_axes_label_format(
+        wnd: WndHandle,
+        xformat: *const c_char,
+        yformat: *const c_char,
+        zformat: *const c_char,
+        props: CellPtr,
+    ) -> c_int;
     fn af_set_axes_limits_compute(
         wnd: WndHandle,
         x: AfArray,
@@ -377,6 +384,39 @@ impl Window {
         }
     }
 
+    /// Set chart axes labels formats
+    ///
+    /// Axes labels use printf style format specifiers. Default specifier for the data displayed
+    /// as labels is %4.1f. This function lets the user change this label formatting to whichever
+    /// format that fits their data range and precision.
+    ///
+    /// # Parameters
+    ///
+    /// - `xlabel` is printf style format specifier for x axis
+    /// - `ylabel` is printf style format specifier for y axis
+    /// - `zlabel` is printf style format specifier for z axis
+    pub fn set_axes_label_formats(&mut self, xformat: String, yformat: String, zformat: String) {
+        let cprops = &Cell {
+            row: self.row,
+            col: self.col,
+            title: ptr::null(),
+            cmap: self.cmap,
+        };
+        let xstr = CString::new(xformat).unwrap();
+        let ystr = CString::new(yformat).unwrap();
+        let zstr = CString::new(zformat).unwrap();
+        unsafe {
+            let err_val = af_set_axes_titles(
+                self.handle as WndHandle,
+                xstr.as_ptr(),
+                ystr.as_ptr(),
+                zstr.as_ptr(),
+                cprops as *const Cell as CellPtr,
+            );
+            HANDLE_ERROR(AfError::from(err_val));
+        }
+    }
+
     /// Set chart axes limits by computing limits from data
     ///
     /// In multiple view (grid) mode, setting limits will effect the chart that is currently
diff --git a/src/image/mod.rs b/src/image/mod.rs
index d7c3fbbf5..b15afd89b 100644
--- a/src/image/mod.rs
+++ b/src/image/mod.rs
@@ -4,8 +4,10 @@ use self::libc::{c_char, c_double, c_float, c_int, c_uint};
 use crate::array::Array;
 use crate::defines::{AfError, BorderType, CannyThresholdType, ColorSpace, Connectivity};
 use crate::defines::{DiffusionEq, FluxFn, InterpType, MomentType, YCCStd};
+use crate::defines::{InverseDeconvAlgo, IterativeDeconvAlgo};
 use crate::error::HANDLE_ERROR;
 use crate::util::{AfArray, DimT, MutAfArray};
+use crate::util::{ConfidenceCCInput, DeconvInput};
 use crate::util::{EdgeComputable, GrayRGBConvertible, MomentsComputable, RealFloating};
 use crate::util::{FloatingPoint, HasAfEnum, ImageFilterType, ImageNativeType, RealNumber};
 use std::ffi::CString;
@@ -191,6 +193,31 @@ extern "C" {
         fftype: c_int,
         diff_kind: c_int,
     ) -> c_int;
+    fn af_confidence_cc(
+        out: MutAfArray,
+        input: AfArray,
+        seedx: AfArray,
+        seedy: AfArray,
+        radius: c_uint,
+        multiplier: c_uint,
+        iterations: c_int,
+        seg_val: c_double,
+    ) -> c_int;
+    fn af_iterative_deconv(
+        out: MutAfArray,
+        input: AfArray,
+        ker: AfArray,
+        iterations: c_uint,
+        rfactor: c_float,
+        algo: c_int,
+    ) -> c_int;
+    fn af_inverse_deconv(
+        out: MutAfArray,
+        input: AfArray,
+        ker: AfArray,
+        gamma: c_float,
+        algo: c_int,
+    ) -> c_int;
 }
 
 /// Calculate the gradients
@@ -1740,3 +1767,188 @@ where
     }
     temp.into()
 }
+
+/// Segment image based on similar pixel characteristics
+///
+/// This filter is similar to [regions](./fn.regions.html) with additional criteria for
+/// segmentation. In regions, all connected pixels are considered to be a single component.
+/// In this variation of connected components, pixels having similar pixel statistics of the
+/// neighborhoods around a given set of seed points are grouped together.
+///
+/// The parameter `radius` determines the size of neighborhood around a seed point.
+///
+/// Mean and Variance are the pixel statistics that are computed across all neighborhoods around
+/// the given set of seed points. The pixels which are connected to seed points and lie in the
+/// confidence interval are grouped together. Given below is the confidence interval.
+///
+/// \begin{equation}
+///     [\mu - \alpha * \sigma, \mu + \alpha * \sigma]
+/// \end{equation}
+/// where
+///
+/// - $ \mu $ is the mean of the pixels in the seed neighborhood
+/// - $ \sigma^2 $ is the variance of the pixels in the seed neighborhood
+/// - $ \alpha $ is the multiplier used to control the width of the confidence interval.
+///
+/// This filter follows an iterative approach for fine tuning the segmentation. An initial
+/// segmenetation followed by a finite number `iterations` of segmentations are performed.
+/// The user provided parameter `iterations` is only a request and the algorithm can prempt
+/// the execution if variance approaches zero. The initial segmentation uses the mean and
+/// variance calculated from the neighborhoods of all the seed points. For subsequent
+/// segmentations, all pixels in the previous segmentation are used to re-calculate the mean
+/// and variance (as opposed to using the pixels in the neighborhood of the seed point).
+///
+/// # Parameters
+///
+/// - `input` is the input image
+/// - `seedx` contains the x coordinates of seeds in image coordinates
+/// - `seedy` contains the y coordinates of seeds in image coordinates
+/// - `radius` is the neighborhood region to be considered around each seed point
+/// - `multiplier` controls the threshold range computed from the mean and variance of seed point neighborhoods
+/// - `iterations` is the number of times the segmentation in performed
+/// - `segmented_value` is the value to which output array valid pixels are set to
+///
+/// # Return Values
+///
+/// Segmented(based on pixel characteristics) image(Array) with regions surrounding the seed points
+pub fn confidence_cc<InOutType>(
+    input: &Array<InOutType>,
+    seedx: &Array<u32>,
+    seedy: &Array<u32>,
+    radius: u32,
+    multiplier: u32,
+    iterations: u32,
+    segmented_val: f64,
+) -> Array<InOutType>
+where
+    InOutType: ConfidenceCCInput,
+{
+    let mut temp: i64 = 0;
+    unsafe {
+        let err_val = af_confidence_cc(
+            &mut temp as MutAfArray,
+            input.get() as AfArray,
+            seedx.get() as AfArray,
+            seedy.get() as AfArray,
+            radius,
+            multiplier,
+            iterations as i32,
+            segmented_val,
+        );
+        HANDLE_ERROR(AfError::from(err_val));
+    }
+    temp.into()
+}
+
+/// Iterative Deconvolution
+///
+/// The following table shows the iteration update equations of the respective
+/// deconvolution algorithms.
+///
+/// <table>
+/// <tr><th>Algorithm</th><th>Update Equation</th></tr>
+/// <tr>
+///     <td>LandWeber</td>
+///     <td>
+///         $ \hat{I}_{n} = \hat{I}_{n-1} + \alpha * P^T \otimes (I - P \otimes \hat{I}_{n-1}) $
+///     </td>
+/// </tr>
+/// <tr>
+///   <td>Richardson-Lucy</td>
+///   <td>
+///     $ \hat{I}_{n} = \hat{I}_{n-1} . ( \frac{I}{\hat{I}_{n-1} \otimes P} \otimes P^T ) $
+///   </td>
+/// </tr>
+/// </table>
+///
+/// where
+///
+/// - $ I $ is the observed(input/blurred) image
+/// - $ P $ is the point spread function
+/// - $ P^T $ is the transpose of point spread function
+/// - $ \hat{I}_{n} $ is the current iteration's updated image estimate
+/// - $ \hat{I}_{n-1} $ is the previous iteration's image estimate
+/// - $ \alpha $ is the relaxation factor
+/// - $ \otimes $ indicates the convolution operator
+///
+/// The type of output Array from deconvolution will be of type f64 if
+/// the input array type is f64. For other types, output type will be f32 type.
+/// Should the caller want to save the image to disk or require the values of output
+/// to be in a fixed range, that should be done by the caller explicitly.
+pub fn iterative_deconv<T>(
+    input: &Array<T>,
+    kernel: &Array<f32>,
+    iterations: u32,
+    relaxation_factor: f32,
+    algo: IterativeDeconvAlgo,
+) -> Array<T::AbsOutType>
+where
+    T: DeconvInput,
+    T::AbsOutType: HasAfEnum,
+{
+    let mut temp: i64 = 0;
+    unsafe {
+        let err_val = af_iterative_deconv(
+            &mut temp as MutAfArray,
+            input.get() as AfArray,
+            kernel.get() as AfArray,
+            iterations,
+            relaxation_factor,
+            algo as c_int,
+        );
+        HANDLE_ERROR(AfError::from(err_val));
+    }
+    temp.into()
+}
+
+/// Inverse deconvolution
+///
+/// This is a linear algorithm i.e. they are non-iterative in
+/// nature and usually faster than iterative deconvolution algorithms.
+///
+/// Depending on the values passed on to `algo` of type enum [InverseDeconvAlgo](./enum.inverse_deconv_algo.html),
+/// different equations are used to compute the final result.
+///
+/// #### Tikhonov's Deconvolution Method:
+///
+/// The update equation for this algorithm is as follows:
+///
+/// <div>
+/// \begin{equation}
+/// \hat{I}_{\omega} = \frac{ I_{\omega} * P^{*}_{\omega} } { |P_{\omega}|^2 + \gamma }
+/// \end{equation}
+/// </div>
+///
+/// where
+///
+/// - $ I_{\omega} $ is the observed(input/blurred) image in frequency domain
+/// - $ P_{\omega} $ is the point spread function in frequency domain
+/// - $ \gamma $ is a user defined regularization constant
+///
+/// The type of output Array from deconvolution will be double if the input array type is double.
+/// Otherwise, it will be float in rest of the cases. Should the caller want to save the image to
+/// disk or require the values of output to be in a fixed range, that should be done by the caller
+/// explicitly.
+pub fn inverse_deconv<T>(
+    input: &Array<T>,
+    kernel: &Array<f32>,
+    gamma: f32,
+    algo: InverseDeconvAlgo,
+) -> Array<T::AbsOutType>
+where
+    T: DeconvInput,
+    T::AbsOutType: HasAfEnum,
+{
+    let mut temp: i64 = 0;
+    unsafe {
+        let err_val = af_inverse_deconv(
+            &mut temp as MutAfArray,
+            input.get() as AfArray,
+            kernel.get() as AfArray,
+            gamma,
+            algo as c_int,
+        );
+        HANDLE_ERROR(AfError::from(err_val));
+    }
+    temp.into()
+}
diff --git a/src/lapack/mod.rs b/src/lapack/mod.rs
index e93e6c3f6..b7ba256da 100644
--- a/src/lapack/mod.rs
+++ b/src/lapack/mod.rs
@@ -24,6 +24,7 @@ extern "C" {
     fn af_det(det_real: MutDouble, det_imag: MutDouble, input: AfArray) -> c_int;
     fn af_norm(out: MutDouble, input: AfArray, ntype: c_uint, p: c_double, q: c_double) -> c_int;
     fn af_is_lapack_available(out: *mut c_int) -> c_int;
+    fn af_pinverse(out: MutAfArray, input: AfArray, tol: c_double, options: c_int) -> c_int;
 }
 
 /// Perform Singular Value Decomposition
@@ -496,3 +497,37 @@ pub fn is_lapack_available() -> bool {
     }
     temp > 0 // Return boolean fla
 }
+
+/// Psuedo Inverse of Matrix
+///
+/// # Parameters
+///
+/// - `input` is input matrix
+/// - `tolerance` defines the lower threshold for singular values from SVD
+/// - `option` must be [MatProp::NONE](./enum.MatProp.html) (more options might be supported in the future)
+///
+/// Notes:
+///
+/// - Tolerance is not the actual lower threshold, but it is passed in as a
+///   parameter to the calculation of the actual threshold relative to the shape and contents of input.
+/// - First, try setting tolerance to 1e-6 for single precision and 1e-12 for double.
+///
+/// # Return
+///
+/// Pseudo Inverse matrix for the input matrix
+pub fn pinverse<T>(input: &Array<T>, tolerance: f64, option: MatProp) -> Array<T>
+where
+    T: HasAfEnum + FloatingPoint,
+{
+    let mut out: i64 = 0;
+    unsafe {
+        let err_val = af_pinverse(
+            &mut out as MutAfArray,
+            input.get() as AfArray,
+            tolerance,
+            option as c_int,
+        );
+        HANDLE_ERROR(AfError::from(err_val));
+    }
+    out.into()
+}
diff --git a/src/lib.rs b/src/lib.rs
index 983f1c7fa..c57f60f4b 100755
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -71,6 +71,9 @@ mod dim4;
 pub use crate::error::{handle_error_general, register_error_handler, Callback, ErrorCallback};
 mod error;
 
+pub use crate::event::*;
+mod event;
+
 #[cfg(feature = "indexing")]
 pub use crate::index::*;
 #[cfg(feature = "indexing")]
@@ -96,6 +99,11 @@ pub use crate::lapack::*;
 #[cfg(feature = "lapack")]
 mod lapack;
 
+#[cfg(feature = "machine_learning")]
+pub use crate::machinelearning::*;
+#[cfg(feature = "machine_learning")]
+mod machinelearning;
+
 #[cfg(feature = "macros")]
 mod macros;
 mod num;
@@ -122,6 +130,7 @@ mod statistics;
 
 pub use crate::util::{get_size, HasAfEnum, ImplicitPromote};
 pub use crate::util::{ComplexFloating, FloatingPoint, RealFloating, RealNumber};
+pub use crate::util::{ConfidenceCCInput, DeconvInput, ReduceByKeyInput};
 pub use crate::util::{CovarianceComputable, EdgeComputable, MedianComputable, MomentsComputable};
 pub use crate::util::{GrayRGBConvertible, ImageFilterType, ImageNativeType, Scanable};
 mod util;
diff --git a/src/machinelearning.rs b/src/machinelearning.rs
new file mode 100644
index 000000000..372f22dfa
--- /dev/null
+++ b/src/machinelearning.rs
@@ -0,0 +1,144 @@
+extern crate libc;
+
+use self::libc::{c_int, c_longlong, c_uint};
+use crate::array::Array;
+use crate::defines::{AfError, ConvGradientType};
+use crate::dim4::Dim4;
+use crate::error::HANDLE_ERROR;
+use crate::util::{AfArray, DimT, HasAfEnum, MutAfArray, RealFloating};
+
+#[allow(dead_code)]
+extern "C" {
+    fn af_convolve2_nn(
+        out: MutAfArray,
+        signal: AfArray,
+        filter: AfArray,
+        stride_dims: c_uint,
+        strides: *const DimT,
+        padding_dim: c_uint,
+        paddings: *const DimT,
+        dilation_dim: c_uint,
+        dilations: *const DimT,
+    ) -> c_int;
+
+    fn af_convolve2_gradient_nn(
+        out: MutAfArray,
+        incoming_gradient: AfArray,
+        original_signal: AfArray,
+        original_filter: AfArray,
+        convolved_output: AfArray,
+        stride_dims: c_uint,
+        strides: *const DimT,
+        padding_dims: c_uint,
+        paddings: *const DimT,
+        dilation_dims: c_uint,
+        dilations: *const DimT,
+        grad_type: c_int,
+    ) -> c_int;
+}
+
+/// Convolution Integral for two dimensional data
+///
+/// This version of convolution is consistent with the machine learning formulation
+/// that will spatially convolve a filter on 2-dimensions against a signal. Multiple
+/// signals and filters can be batched against each other. Furthermore, the signals
+/// and filters can be multi-dimensional however their dimensions must match. Usually,
+/// this is the forward pass convolution in ML
+///
+/// Example:
+///
+/// Signals with dimensions: d0 x d1 x d2 x Ns
+///
+/// Filters with dimensions: d0 x d1 x d2 x Nf
+///
+/// Resulting Convolution: d0 x d1 x Nf x Ns
+///
+/// # Parameters
+///
+/// - `signal` is the input signal
+/// - `filter` is convolution filter
+/// - `strides` are distance between consecutive elements along each dimension for original convolution
+/// - `padding` specifies padding width along each dimension for original convolution
+/// - `dilation` specifies filter dilation along each dimension for original convolution
+///
+/// # Return Values
+///
+/// Convolved Array
+pub fn convolve2_nn<T>(
+    signal: &Array<T>,
+    filter: &Array<T>,
+    strides: Dim4,
+    padding: Dim4,
+    dilation: Dim4,
+) -> Array<T>
+where
+    T: HasAfEnum + RealFloating,
+{
+    let mut temp: i64 = 0;
+    unsafe {
+        let err_val = af_convolve2_nn(
+            &mut temp as MutAfArray,
+            signal.get() as AfArray,
+            filter.get() as AfArray,
+            strides.ndims() as c_uint,
+            strides.get().as_ptr() as *const c_longlong,
+            padding.ndims() as c_uint,
+            padding.get().as_ptr() as *const c_longlong,
+            dilation.ndims() as c_uint,
+            dilation.get().as_ptr() as *const c_longlong,
+        );
+        HANDLE_ERROR(AfError::from(err_val));
+    }
+    temp.into()
+}
+
+/// Backward pass gradient of 2D convolution
+///
+/// # Parameters
+///
+/// - `incoming_gradient` gradients to be distributed in backwards pass
+/// - `original_signal` input signal to forward pass of convolution assumed structure of input is ( d0 x d1 x d2 x N )
+/// - `original_filter` input filter to forward pass of convolution assumed structure of input is ( d0 x d1 x d2 x N )
+/// - `convolved_output` output from forward pass of convolution
+/// - `strides` are distance between consecutive elements along each dimension for original convolution
+/// - `padding` specifies padding width along each dimension for original convolution
+/// - `dilation` specifies filter dilation along each dimension for original convolution
+/// - `grad_type` specifies which gradient to return
+///
+/// # Return Values
+///
+/// Gradient Array w.r.t input generated from [convolve2_nn](./fn.convolve2_nn.html)
+#[allow(clippy::too_many_arguments)]
+pub fn convolve2_gradient_nn<T>(
+    incoming_grad: &Array<T>,
+    original_signal: &Array<T>,
+    original_filter: &Array<T>,
+    convolved_output: &Array<T>,
+    strides: Dim4,
+    padding: Dim4,
+    dilation: Dim4,
+    grad_type: ConvGradientType,
+) -> Array<T>
+where
+    T: HasAfEnum + RealFloating,
+{
+    let mut temp: i64 = 0;
+    unsafe {
+        let err_val = af_convolve2_gradient_nn(
+            &mut temp as MutAfArray,
+            incoming_grad.get() as AfArray,
+            original_signal.get() as AfArray,
+            original_filter.get() as AfArray,
+            convolved_output.get() as AfArray,
+            strides.ndims() as c_uint,
+            strides.get().as_ptr() as *const c_longlong,
+            padding.ndims() as c_uint,
+            padding.get().as_ptr() as *const c_longlong,
+            dilation.ndims() as c_uint,
+            dilation.get().as_ptr() as *const c_longlong,
+            grad_type as c_int,
+        );
+        HANDLE_ERROR(AfError::from(err_val));
+    }
+    temp.into()
+}
diff --git a/src/signal/mod.rs b/src/signal/mod.rs
index 7a0111dc0..e837917b7 100644
--- a/src/signal/mod.rs
+++ b/src/signal/mod.rs
@@ -19,6 +19,36 @@ extern "C" {
         off_grid: c_float,
     ) -> c_int;
 
+    fn af_approx1_v2(
+        out: MutAfArray,
+        inp: AfArray,
+        pos: AfArray,
+        method: c_int,
+        off_grid: c_float,
+    ) -> c_int;
+
+    fn af_approx1_uniform(
+        out: MutAfArray,
+        inp: AfArray,
+        pos: AfArray,
+        interp_dim: c_int,
+        idx_start: c_double,
+        idx_step: c_double,
+        method: c_int,
+        off_grid: c_float,
+    ) -> c_int;
+
+    fn af_approx1_uniform_v2(
+        out: MutAfArray,
+        inp: AfArray,
+        pos: AfArray,
+        interp_dim: c_int,
+        idx_start: c_double,
+        idx_step: c_double,
+        method: c_int,
+        off_grid: c_float,
+    ) -> c_int;
+
     fn af_approx2(
         out: MutAfArray,
         inp: AfArray,
@@ -28,6 +58,45 @@ extern "C" {
         off_grid: c_float,
     ) -> c_int;
 
+    fn af_approx2_v2(
+        out: MutAfArray,
+        inp: AfArray,
+        pos0: AfArray,
+        pos1: AfArray,
+        method: c_int,
+        off_grid: c_float,
+    ) -> c_int;
+
+    fn af_approx2_uniform(
+        out: MutAfArray,
+        inp: AfArray,
+        pos0: AfArray,
+        interp_dim0: c_int,
+        idx_start_dim0: c_double,
+        idx_step_dim0: c_double,
+        pos1: AfArray,
+        interp_dim1: c_int,
+        idx_start_dim1: c_double,
+        idx_step_dim1: c_double,
+        method: c_int,
+        off_grid: c_float,
+    ) -> c_int;
+
+    fn af_approx2_uniform_v2(
+        out: MutAfArray,
+        inp: AfArray,
+        pos0: AfArray,
+        interp_dim0: c_int,
+        idx_start_dim0: c_double,
+        idx_step_dim0: c_double,
+        pos1: AfArray,
+        interp_dim1: c_int,
+        idx_start_dim1: c_double,
+        idx_step_dim1: c_double,
+        method: c_int,
+        off_grid: c_float,
+    ) -> c_int;
+
     fn af_set_fft_plan_cache_size(cache_size: size_t) -> c_int;
 
     fn af_fft(out: MutAfArray, arr: AfArray, nfac: c_double, odim0: c_longlong) -> c_int;
@@ -120,7 +189,6 @@ extern "C" {
 /// # Return Values
 ///
 /// An Array with interpolated values
-#[allow(unused_mut)]
 pub fn approx1<T, P>(
     input: &Array<T>,
     pos: &Array<P>,
@@ -145,7 +213,105 @@ where
     temp.into()
 }
 
-#[allow(unused_mut)]
+/// Same as [approx1](./fn.approx1.html) but uses existing Array as output
+pub fn approx1_v2<T, P>(
+    output: &mut Array<T>,
+    input: &Array<T>,
+    pos: &Array<P>,
+    method: InterpType,
+    off_grid: f32,
+) where
+    T: HasAfEnum + FloatingPoint,
+    P: HasAfEnum + RealFloating,
+{
+    unsafe {
+        let err_val = af_approx1_v2(
+            output.get() as MutAfArray,
+            input.get() as AfArray,
+            pos.get() as AfArray,
+            method as c_int,
+            off_grid as c_float,
+        );
+        HANDLE_ERROR(AfError::from(err_val));
+    }
+}
+
+/// Perform signal interpolation for 1d signals along specified dimension
+///
+/// # Parameters
+///
+/// - `input` is the input Array
+/// - `pos` Array contains the interpolation locations
+/// - `interp_dim` is the dimension along which interpolation is performed
+/// - `start` is the first index along `interp_dim`
+/// - `step` is the uniform spacing value between subsequent indices along `interp_dim`
+/// - `method` indicates the type of interpolation method that be used. It is of type enum
+/// [InterpType](./enum.InterpType.html)
+/// - `off_grid` is the value that will set in the output Array when certain index is out of bounds
+///
+/// # Return Values
+///
+/// An Array with interpolated values
+pub fn approx1_uniform<T, P>(
+    input: &Array<T>,
+    pos: &Array<P>,
+    interp_dim: i32,
+    start: f64,
+    step: f64,
+    method: InterpType,
+    off_grid: f32,
+) -> Array<T>
+where
+    T: HasAfEnum + FloatingPoint,
+    P: HasAfEnum + RealFloating,
+{
+    let mut temp: i64 = 0;
+    unsafe {
+        let err_val = af_approx1_uniform(
+            &mut temp as MutAfArray,
+            input.get() as AfArray,
+            pos.get() as AfArray,
+            interp_dim,
+            start,
+            step,
+            method as c_int,
+            off_grid,
+        );
+        HANDLE_ERROR(AfError::from(err_val));
+    }
+    temp.into()
+}
+
+/// Same as [approx1_uniform](./fn.approx1_uniform.html) but uses existing Array as output
+#[allow(clippy::too_many_arguments)]
+pub fn approx1_uniform_v2<T, P>(
+    output: &mut Array<T>,
+    input: &Array<T>,
+    pos: &Array<P>,
+    interp_dim: i32,
+    start: f64,
+    step: f64,
+    method: InterpType,
+    off_grid: f32,
+) where
+    T: HasAfEnum + FloatingPoint,
+    P: HasAfEnum + RealFloating,
+{
+    unsafe {
+        let err_val = af_approx1_uniform_v2(
+            output.get() as MutAfArray,
+            input.get() as AfArray,
+            pos.get() as AfArray,
+            interp_dim,
+            start,
+            step,
+            method as c_int,
+            off_grid,
+        );
+        HANDLE_ERROR(AfError::from(err_val));
+    }
+}
+
 /// Perform signal interpolation for 2d signals
 ///
 /// # Parameters
@@ -186,6 +352,128 @@ where
     temp.into()
 }
 
+/// Same as [approx2](./fn.approx2.html) but uses existing Array as output
+pub fn approx2_v2<T, P>(
+    output: &mut Array<T>,
+    input: &Array<T>,
+    pos0: &Array<P>,
+    pos1: &Array<P>,
+    method: InterpType,
+    off_grid: f32,
+) where
+    T: HasAfEnum + FloatingPoint,
+    P: HasAfEnum + RealFloating,
+{
+    unsafe {
+        let err_val = af_approx2_v2(
+            output.get() as MutAfArray,
+            input.get() as AfArray,
+            pos0.get() as AfArray,
+            pos1.get() as AfArray,
+            method as c_int,
+            off_grid as c_float,
+        );
+        HANDLE_ERROR(AfError::from(err_val));
+    }
+}
+
+/// Perform signal interpolation for 2d signals along a specified dimension
+///
+/// # Parameters
+///
+/// - `input` is the input Array
+/// - `pos0` Array contains the interpolation locations for first dimension
+/// - `interp_dim0` is the dimension along which interpolation is performed
+/// - `start0` is the first index along `interp_dim0`
+/// - `step0` is the uniform spacing value between subsequent indices along `interp_dim0`
+/// - `pos1` Array contains the interpolation locations for second dimension
+/// - `interp_dim0` is the dimension along which interpolation is performed
+/// - `start0` is the first index along `interp_dim1`
+/// - `step0` is the uniform spacing value between subsequent indices along `interp_dim1`
+/// - `method` indicates the type of interpolation method that be used. It is of type enum
+/// [InterpType](./enum.InterpType.html)
+/// - `off_grid` is the value that will set in the output Array when certain index is out of bounds
+///
+/// # Return Values
+///
+/// An Array with interpolated values
+#[allow(clippy::too_many_arguments)]
+pub fn approx2_uniform<T, P>(
+    input: &Array<T>,
+    pos0: &Array<P>,
+    interp_dim0: i32,
+    start0: f64,
+    step0: f64,
+    pos1: &Array<P>,
+    interp_dim1: i32,
+    start1: f64,
+    step1: f64,
+    method: InterpType,
+    off_grid: f32,
+) -> Array<T>
+where
+    T: HasAfEnum + FloatingPoint,
+    P: HasAfEnum + RealFloating,
+{
+    let mut temp: i64 = 0;
+    unsafe {
+        let err_val = af_approx2_uniform(
+            &mut temp as MutAfArray,
+            input.get() as AfArray,
+            pos0.get() as AfArray,
+            interp_dim0,
+            start0,
+            step0,
+            pos1.get() as AfArray,
+            interp_dim1,
+            start1,
+            step1,
+            method as c_int,
+            off_grid as c_float,
+        );
+        HANDLE_ERROR(AfError::from(err_val));
+    }
+    temp.into()
+}
+
+/// Same as [approx2_uniform](./fn.approx2_uniform.html) but uses existing Array as output
+#[allow(clippy::too_many_arguments)]
+pub fn approx2_uniform_v2<T, P>(
+    output: &mut Array<T>,
+    input: &Array<T>,
+    pos0: &Array<P>,
+    interp_dim0: i32,
+    start0: f64,
+    step0: f64,
+    pos1: &Array<P>,
+    interp_dim1: i32,
+    start1: f64,
+    step1: f64,
+    method: InterpType,
+    off_grid: f32,
+) where
+    T: HasAfEnum + FloatingPoint,
+    P: HasAfEnum + RealFloating,
+{
+    unsafe {
+        let err_val = af_approx2_uniform_v2(
+            output.get() as MutAfArray,
+            input.get() as AfArray,
+            pos0.get() as AfArray,
+            interp_dim0,
+            start0,
+            step0,
+            pos1.get() as AfArray,
+            interp_dim1,
+            start1,
+            step1,
+            method as c_int,
+            off_grid as c_float,
+        );
+        HANDLE_ERROR(AfError::from(err_val));
+    }
+}
+
 /// Set fft plan cache size
 ///
 /// Though this is a low overhead function, it is advised not to change
diff --git a/src/statistics/mod.rs b/src/statistics/mod.rs
index 793f2f5ed..d20a7d8cf 100644
--- a/src/statistics/mod.rs
+++ b/src/statistics/mod.rs
@@ -2,7 +2,7 @@ extern crate libc;
 
 use self::libc::{c_int, c_uint};
 use crate::array::Array;
-use crate::defines::{AfError, TopkFn};
+use crate::defines::{AfError, TopkFn, VarianceBias};
 use crate::error::HANDLE_ERROR;
 use crate::util::{AfArray, DimT, MutAfArray, MutDouble};
 use crate::util::{CovarianceComputable, RealNumber};
@@ -37,6 +37,15 @@ extern "C" {
         dim: c_int,
         order: c_uint,
     ) -> c_int;
+
+    fn af_meanvar(
+        mean: MutAfArray,
+        var: MutAfArray,
+        input: AfArray,
+        weights: AfArray,
+        bias: c_int,
+        dim: DimT,
+    ) -> c_int;
 }
 
 /// Find the median along a given dimension
@@ -413,3 +422,46 @@ where
     }
     (t0.into(), t1.into())
 }
+
+/// Calculate mean and variance in single API call
+///
+///# Parameters
+///
+/// - `input` is the input Array
+/// - `weights` Array has the weights to be used during the stat computation
+/// - `bias` is type of bias used for variance calculation
+/// - `dim` is dimension along which the current stat has to be computed
+///
+///# Return Values
+///
+/// A tuple of Arrays, whose size is equal to input except along the dimension which
+/// the stat operation is performed. Array size along `dim` will be reduced to one.
+///
+/// - First Array contains mean values
+/// - Second Array contains variance values
+pub fn meanvar<T, W>(
+    input: &Array<T>,
+    weights: &Array<W>,
+    bias: VarianceBias,
+    dim: i64,
+) -> (Array<T::MeanOutType>, Array<T::MeanOutType>)
+where
+    T: HasAfEnum,
+    T::MeanOutType: HasAfEnum,
+    W: HasAfEnum + RealFloating,
+{
+    let mut mean: i64 = 0;
+    let mut var: i64 = 0;
+    unsafe {
+        let err_val = af_meanvar(
+            &mut mean as MutAfArray,
+            &mut var as MutAfArray,
+            input.get() as AfArray,
+            weights.get() as AfArray,
+            bias as c_int,
+            dim as DimT,
+        );
+        HANDLE_ERROR(AfError::from(err_val));
+    }
+    (mean.into(), var.into())
+}
diff --git a/src/util.rs b/src/util.rs
index e2db215b1..5576f4701 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -1,3 +1,4 @@
+extern crate half;
 extern crate libc;
 extern crate num;
 
@@ -12,6 +13,7 @@ use crate::num::Zero;
 use std::mem;
 
 pub type AfArray = self::libc::c_longlong;
+pub type AfEvent = self::libc::c_longlong;
 pub type AfIndex = self::libc::c_longlong;
 pub type CellPtr = *const self::libc::c_void;
 pub type Complex32 = Complex<f32>;
@@ -20,6 +22,7 @@ pub type DimT = self::libc::c_longlong;
 pub type Feat = *const self::libc::c_void;
 pub type Intl = self::libc::c_longlong;
 pub type MutAfArray = *mut self::libc::c_longlong;
+pub type MutAfEvent = *mut self::libc::c_longlong;
 pub type MutAfIndex = *mut self::libc::c_longlong;
 pub type MutDimT = *mut self::libc::c_longlong;
 pub type MutDouble = *mut self::libc::c_double;
@@ -332,6 +335,22 @@ impl HasAfEnum for u16 {
         DType::U16
     }
 }
+impl HasAfEnum for half::f16 {
+    type InType = Self;
+    type BaseType = Self;
+    type AbsOutType = Self;
+    type ArgOutType = Self;
+    type UnaryOutType = Self;
+    type ComplexOutType = Complex<half::f16>;
+    type MeanOutType = Self;
+    type AggregateOutType = f32;
+    type ProductOutType = f32;
+    type SobelOutType = Self;
+
+    fn get_af_dtype() -> DType {
+        DType::F16
+    }
+}
 impl HasAfEnum for i32 {
     type InType = Self;
     type BaseType = Self;
@@ -772,3 +791,25 @@ impl CovarianceComputable for u16 {}
 impl CovarianceComputable for u8 {}
 impl CovarianceComputable for u64 {}
 impl CovarianceComputable for i64 {}
+
+/// Trait qualifier for confidence connected components input
+pub trait ConfidenceCCInput: HasAfEnum {}
+
+impl ConfidenceCCInput for f32 {}
+impl ConfidenceCCInput for u32 {}
+impl ConfidenceCCInput for u16 {}
+impl ConfidenceCCInput for u8 {}
+
+/// Trait qualifier for confidence connected components input
+pub trait DeconvInput: HasAfEnum {}
+
+impl DeconvInput for f32 {}
+impl DeconvInput for i16 {}
+impl DeconvInput for u16 {}
+impl DeconvInput for u8 {}
+
+/// Trait qualifier for Reduction Key type
+pub trait ReduceByKeyInput: HasAfEnum {}
+
+impl ReduceByKeyInput for i32 {}
+impl ReduceByKeyInput for u32 {}

Algorithm	Update Equation
LandWeber	+/// $ \hat{I}_{n} = \hat{I}_{n-1} + \alpha * P^T \otimes (I - P \otimes \hat{I}_{n-1}) $ +///
Richardson-Lucy	+/// $ \hat{I}_{n} = \hat{I}_{n-1} . ( \frac{I}{\hat{I}_{n-1} \otimes P} \otimes P^T ) $ +///