rust-ndarray · bluss · Dec 21, 2016 · Dec 21, 2016 · Dec 21, 2016 · Dec 21, 2016
diff --git a/Cargo.toml b/Cargo.toml
@@ -39,19 +39,24 @@ optional = true
 blas-sys = { version = "0.6.5", optional = true, default-features = false }
 matrixmultiply = { version = "0.1.13" }
 
+rayon = { version = "0.6.0", optional = true }
+
 [dependencies.serde]
 version = "0.8.20"
 optional = true
 
+[dev-dependencies]
+num_cpus = "1.2"
+
 [features]
 blas = ["blas-sys"]
 
 # These features are used for testing
 blas-openblas-sys = ["blas"]
-test = ["blas-openblas-sys"]
+test = ["blas-openblas-sys", "rayon"]
 
 # This feature is used for docs
-docs = ["rustc-serialize", "serde"]
+docs = ["rustc-serialize", "serde", "rayon"]
 
 [profile.release]
 [profile.bench]

diff --git a/README.rst b/README.rst
@@ -69,6 +69,11 @@ your `Cargo.toml`.
     Uses ``blas-sys`` for pluggable backend, which needs to be configured
     separately.
 
+- ``rayon``
+
+  - Optional, compatible with Rust stable
+  - Implement rayon 0.6 parallelization.
+
 How to use with cargo::
 
     [dependencies]

diff --git a/benches/rayon.rs b/benches/rayon.rs
@@ -0,0 +1,114 @@
+
+#![feature(test)]
+
+extern crate num_cpus;
+extern crate test;
+use test::Bencher;
+
+#[macro_use(s)]
+extern crate ndarray;
+use ndarray::prelude::*;
+
+extern crate rayon;
+use rayon::prelude::*;
+
+const EXP_N: usize = 128;
+
+use std::cmp::max;
+
+fn set_threads() {
+    let n = max(1, num_cpus::get() / 2);
+    let cfg = rayon::Configuration::new().set_num_threads(n);
+    let _ = rayon::initialize(cfg);
+}
+
+#[bench]
+fn map_exp_regular(bench: &mut Bencher)
+{
+    let mut a = Array2::<f64>::zeros((EXP_N, EXP_N));
+    a.swap_axes(0, 1);
+    bench.iter(|| {
+        a.mapv_inplace(|x| x.exp());
+    });
+}
+
+#[bench]
+fn rayon_exp_regular(bench: &mut Bencher)
+{
+    set_threads();
+    let mut a = Array2::<f64>::zeros((EXP_N, EXP_N));
+    a.swap_axes(0, 1);
+    bench.iter(|| {
+        a.view_mut().into_par_iter().for_each(|x| *x = x.exp());
+    });
+}
+
+const FASTEXP: usize = 800;
+
+#[inline]
+fn fastexp(x: f64) -> f64 {
+    let x = 1. + x/1024.;
+    x.powi(1024)
+}
+
+#[bench]
+fn map_fastexp_regular(bench: &mut Bencher)
+{
+    let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
+    bench.iter(|| {
+        a.mapv_inplace(|x| fastexp(x))
+    });
+}
+
+#[bench]
+fn rayon_fastexp_regular(bench: &mut Bencher)
+{
+    set_threads();
+    let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
+    bench.iter(|| {
+        a.view_mut().into_par_iter().for_each(|x| *x = fastexp(*x));
+    });
+}
+
+#[bench]
+fn map_fastexp_cut(bench: &mut Bencher)
+{
+    let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
+    let mut a = a.slice_mut(s![.., ..-1]);
+    bench.iter(|| {
+        a.mapv_inplace(|x| fastexp(x))
+    });
+}
+
+#[bench]
+fn rayon_fastexp_cut(bench: &mut Bencher)
+{
+    set_threads();
+    let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
+    let mut a = a.slice_mut(s![.., ..-1]);
+    bench.iter(|| {
+        a.view_mut().into_par_iter().for_each(|x| *x = fastexp(*x));
+    });
+}
+
+#[bench]
+fn map_fastexp_by_axis(bench: &mut Bencher)
+{
+    let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
+    bench.iter(|| {
+        for mut sheet in a.axis_iter_mut(Axis(0)) {
+            sheet.mapv_inplace(fastexp)
+        }
+    });
+}
+
+#[bench]
+fn rayon_fastexp_by_axis(bench: &mut Bencher)
+{
+    set_threads();
+    let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
+    bench.iter(|| {
+        a.axis_iter_mut(Axis(0)).into_par_iter()
+            .for_each(|mut sheet| sheet.mapv_inplace(fastexp));
+    });
+}
diff --git a/src/iterators/mod.rs b/src/iterators/mod.rs
@@ -21,6 +21,9 @@ use super::{
     Axis,
 };
 
+#[cfg(feature = "rayon")]
+pub mod par;
+
 /// Base for array iterators
 ///
 /// Iterator element type is `&'a A`.

diff --git a/src/iterators/par.rs b/src/iterators/par.rs
@@ -0,0 +1,225 @@
+
+
+use rayon::par_iter::ParallelIterator;
+use rayon::par_iter::IntoParallelIterator;
+use rayon::par_iter::IndexedParallelIterator;
+use rayon::par_iter::ExactParallelIterator;
+use rayon::par_iter::BoundedParallelIterator;
+use rayon::par_iter::internal::{Consumer, UnindexedConsumer};
+use rayon::par_iter::internal::bridge;
+use rayon::par_iter::internal::bridge_unindexed;
+use rayon::par_iter::internal::ProducerCallback;
+use rayon::par_iter::internal::Producer;
+use rayon::par_iter::internal::UnindexedProducer;
+#[cfg(rayon_fold_with)]
+use rayon::par_iter::internal::Folder;
+
+use super::AxisIter;
+use super::AxisIterMut;
+use imp_prelude::*;
+
+/// Wrapper type for parallelized implementations.
+///
+/// **Requires crate feature `"rayon"`**
+#[derive(Copy, Clone, Debug)]
+pub struct Parallel<I> {
+    iter: I,
+}
+
+macro_rules! par_iter_wrapper {
+    // thread_bounds are either Sync or Send + Sync
+    ($iter_name:ident, [$($thread_bounds:tt)*]) => {
+    /// This iterator can be turned into a parallel iterator (rayon crate).
+    ///
+    /// **Requires crate feature `"rayon"`**
+    impl<'a, A, D> IntoParallelIterator for $iter_name<'a, A, D>
+        where D: Dimension,
+              A: $($thread_bounds)*,
+    {
+        type Item = <Self as Iterator>::Item;
+        type Iter = Parallel<Self>;
+        fn into_par_iter(self) -> Self::Iter {
+            Parallel {
+                iter: self,
+            }
+        }
+    }
+
+    impl<'a, A, D> ParallelIterator for Parallel<$iter_name<'a, A, D>>
+        where D: Dimension,
+              A: $($thread_bounds)*,
+    {
+        type Item = <$iter_name<'a, A, D> as Iterator>::Item;
+        fn drive_unindexed<C>(self, consumer: C) -> C::Result
+            where C: UnindexedConsumer<Self::Item>
+        {
+            bridge(self, consumer)
+        }
+
+        fn opt_len(&mut self) -> Option<usize> {
+            Some(self.iter.len())
+        }
+    }
+
+    impl<'a, A, D> IndexedParallelIterator for Parallel<$iter_name<'a, A, D>>
+        where D: Dimension,
+              A: $($thread_bounds)*,
+    {
+        fn with_producer<Cb>(self, callback: Cb) -> Cb::Output
+            where Cb: ProducerCallback<Self::Item>
+        {
+            callback.callback(self.iter)
+        }
+    }
+
+    impl<'a, A, D> ExactParallelIterator for Parallel<$iter_name<'a, A, D>>
+        where D: Dimension,
+              A: $($thread_bounds)*,
+    {
+        fn len(&mut self) -> usize {
+            ExactSizeIterator::len(&self.iter)
+        }
+    }
+
+    impl<'a, A, D> BoundedParallelIterator for Parallel<$iter_name<'a, A, D>>
+        where D: Dimension,
+              A: $($thread_bounds)*,
+    {
+        fn upper_bound(&mut self) -> usize {
+            ExactSizeIterator::len(&self.iter)
+        }
+
+        fn drive<C>(self, consumer: C) -> C::Result
+            where C: Consumer<Self::Item>
+        {
+            bridge(self, consumer)
+        }
+    }
+
+    // This is the real magic, I guess
+
+    impl<'a, A, D> Producer for $iter_name<'a, A, D>
+        where D: Dimension,
+              A: $($thread_bounds)*,
+    {
+        fn cost(&mut self, len: usize) -> f64 {
+            // FIXME: No idea about what this is
+            len as f64
+        }
+
+        fn split_at(self, i: usize) -> (Self, Self) {
+            self.split_at(i)
+        }
+    }
+    }
+}
+
+par_iter_wrapper!(AxisIter, [Sync]);
+par_iter_wrapper!(AxisIterMut, [Send + Sync]);
+
+impl<'a, A, D> IntoParallelIterator for &'a Array<A, D>
+    where D: Dimension,
+          A: Sync
+{
+    type Item = &'a A;
+    type Iter = Parallel<ArrayView<'a, A, D>>;
+    fn into_par_iter(self) -> Self::Iter {
+        self.view().into_par_iter()
+    }
+}
+
+impl<'a, A, D> IntoParallelIterator for &'a RcArray<A, D>
+    where D: Dimension,
+          A: Sync
+{
+    type Item = &'a A;
+    type Iter = Parallel<ArrayView<'a, A, D>>;
+    fn into_par_iter(self) -> Self::Iter {
+        self.view().into_par_iter()
+    }
+}
+
+impl<'a, A, D> IntoParallelIterator for &'a mut Array<A, D>
+    where D: Dimension,
+          A: Sync + Send
+{
+    type Item = &'a mut A;
+    type Iter = Parallel<ArrayViewMut<'a, A, D>>;
+    fn into_par_iter(self) -> Self::Iter {
+        self.view_mut().into_par_iter()
+    }
+}
+
+impl<'a, A, D> IntoParallelIterator for &'a mut RcArray<A, D>
+    where D: Dimension,
+          A: Sync + Send + Clone,
+{
+    type Item = &'a mut A;
+    type Iter = Parallel<ArrayViewMut<'a, A, D>>;
+    fn into_par_iter(self) -> Self::Iter {
+        self.view_mut().into_par_iter()
+    }
+}
+
+macro_rules! par_iter_view_wrapper {
+    // thread_bounds are either Sync or Send + Sync
+    ($view_name:ident, [$($thread_bounds:tt)*]) => {
+    impl<'a, A, D> IntoParallelIterator for $view_name<'a, A, D>
+        where D: Dimension,
+              A: $($thread_bounds)*,
+    {
+        type Item = <Self as IntoIterator>::Item;
+        type Iter = Parallel<Self>;
+        fn into_par_iter(self) -> Self::Iter {
+            Parallel {
+                iter: self,
+            }
+        }
+    }
+
+
+    impl<'a, A, D> ParallelIterator for Parallel<$view_name<'a, A, D>>
+        where D: Dimension,
+              A: $($thread_bounds)*,
+    {
+        type Item = <$view_name<'a, A, D> as IntoIterator>::Item;
+        fn drive_unindexed<C>(self, consumer: C) -> C::Result
+            where C: UnindexedConsumer<Self::Item>
+        {
+            bridge_unindexed(self.iter, consumer)
+        }
+
+        fn opt_len(&mut self) -> Option<usize> {
+            Some(self.iter.len())
+        }
+    }
+
+    impl<'a, A, D> UnindexedProducer for $view_name<'a, A, D>
+        where D: Dimension,
+              A: $($thread_bounds)*,
+    {
+        fn can_split(&self) -> bool {
+            self.len() > 1
+        }
+
+        fn split(self) -> (Self, Self) {
+            let max_axis = self.max_stride_axis();
+            let mid = self.len_of(max_axis) / 2;
+            let (a, b) = self.split_at(max_axis, mid);
+            //println!("Split along axis {:?} at {}\nshapes {:?}, {:?}", max_axis, mid, a.shape(), b.shape());
+            (a, b)
+        }
+
+        #[cfg(rayon_fold_with)]
+        fn fold_with<F>(self, folder: F) -> F
+            where F: Folder<Self::Item>,
+        {
+            self.into_iter().fold(folder, move |f, elt| f.consume(elt))
+        }
+    }
+
+    }
+}
+
+par_iter_view_wrapper!(ArrayView, [Sync]);
+par_iter_view_wrapper!(ArrayViewMut, [Sync + Send]);