pydata · jhamman · Sep 5, 2017 · Aug 31, 2017 · Sep 1, 2017 · Sep 5, 2017
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -565,22 +565,31 @@ def reset_coords(self, names=None, drop=False, inplace=False):
             dataset[self.name] = self.variable
             return dataset
 
-    def load(self):
+    def load(self, **kwargs):
         """Manually trigger loading of this array's data from disk or a
         remote source into memory and return this array.
 
         Normally, it should not be necessary to call this method in user code,
         because all xarray functions should either work on deferred data or
         load data automatically. However, this method can be necessary when
         working with many file objects on disk.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.array.compute``.
+
+        See Also
+        --------
+        dask.array.compute
         """
-        ds = self._to_temp_dataset().load()
+        ds = self._to_temp_dataset().load(**kwargs)
         new = self._from_temp_dataset(ds)
         self._variable = new._variable
         self._coords = new._coords
         return self
 
-    def compute(self):
+    def compute(self, **kwargs):
         """Manually trigger loading of this array's data from disk or a
         remote source into memory and return a new array. The original is
         left unaltered.
@@ -589,18 +598,36 @@ def compute(self):
         because all xarray functions should either work on deferred data or
         load data automatically. However, this method can be necessary when
         working with many file objects on disk.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.array.compute``.
+
+        See Also
+        --------
+        dask.array.compute
         """
         new = self.copy(deep=False)
-        return new.load()
+        return new.load(**kwargs)
 
-    def persist(self):
+    def persist(self, **kwargs):
         """ Trigger computation in constituent dask arrays
 
         This keeps them as dask arrays but encourages them to keep data in
         memory.  This is particularly useful when on a distributed machine.
         When on a single machine consider using ``.compute()`` instead.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.persist``.
+
+        See Also
+        --------
+        dask.persist
         """
-        ds = self._to_temp_dataset().persist()
+        ds = self._to_temp_dataset().persist(**kwargs)
         return self._from_temp_dataset(ds)
 
     def copy(self, deep=True):

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -445,14 +445,23 @@ def sizes(self):
         """
         return self.dims
 
-    def load(self):
+    def load(self, **kwargs):
         """Manually trigger loading of this dataset's data from disk or a
         remote source into memory and return this dataset.
 
         Normally, it should not be necessary to call this method in user code,
         because all xarray functions should either work on deferred data or
         load data automatically. However, this method can be necessary when
         working with many file objects on disk.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.array.compute``.
+
+        See Also
+        --------
+        dask.array.compute
         """
         # access .data to coerce everything to numpy or dask arrays
         lazy_data = {k: v._data for k, v in self.variables.items()
@@ -461,7 +470,7 @@ def load(self):
             import dask.array as da
 
             # evaluate all the dask arrays simultaneously
-            evaluated_data = da.compute(*lazy_data.values())
+            evaluated_data = da.compute(*lazy_data.values(), **kwargs)
 
             for k, data in zip(lazy_data, evaluated_data):
                 self.variables[k].data = data
@@ -473,7 +482,7 @@ def load(self):
 
         return self
 
-    def compute(self):
+    def compute(self, **kwargs):
         """Manually trigger loading of this dataset's data from disk or a
         remote source into memory and return a new dataset. The original is
         left unaltered.
@@ -482,11 +491,20 @@ def compute(self):
         because all xarray functions should either work on deferred data or
         load data automatically. However, this method can be necessary when
         working with many file objects on disk.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.array.compute``.
+
+        See Also
+        --------
+        dask.array.compute
         """
         new = self.copy(deep=False)
-        return new.load()
+        return new.load(**kwargs)
 
-    def _persist_inplace(self):
+    def _persist_inplace(self, **kwargs):
         """ Persist all Dask arrays in memory """
         # access .data to coerce everything to numpy or dask arrays
         lazy_data = {k: v._data for k, v in self.variables.items()
@@ -495,24 +513,33 @@ def _persist_inplace(self):
             import dask
 
             # evaluate all the dask arrays simultaneously
-            evaluated_data = dask.persist(*lazy_data.values())
+            evaluated_data = dask.persist(*lazy_data.values(), **kwargs)
 
             for k, data in zip(lazy_data, evaluated_data):
                 self.variables[k].data = data
 
         return self
 
-    def persist(self):
+    def persist(self, **kwargs):
         """ Trigger computation, keeping data as dask arrays
 
         This operation can be used to trigger computation on underlying dask
         arrays, similar to ``.compute()``.  However this operation keeps the
         data as dask arrays.  This is particularly useful when using the
         dask.distributed scheduler and you want to load a large amount of data
         into distributed memory.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.persist``.
+
+        See Also
+        --------
+        dask.persist
         """
         new = self.copy(deep=False)
-        return new._persist_inplace()
+        return new._persist_inplace(**kwargs)
 
     @classmethod
     def _construct_direct(cls, variables, coord_names, dims=None, attrs=None,

diff --git a/xarray/core/variable.py b/xarray/core/variable.py
@@ -307,29 +307,49 @@ def data(self, data):
     def _indexable_data(self):
         return orthogonally_indexable(self._data)
 
-    def load(self):
+    def load(self, **kwargs):
         """Manually trigger loading of this variable's data from disk or a
         remote source into memory and return this variable.
 
         Normally, it should not be necessary to call this method in user code,
         because all xarray functions should either work on deferred data or
         load data automatically.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.array.compute``.
+
+        See Also
+        --------
+        dask.array.compute
         """
-        if not isinstance(self._data, np.ndarray):
+        if isinstance(self._data, dask_array_type):
+            self._data = np.asarray(self._data.compute(**kwargs))
 @property 
 def values(self): 
     """The variable's data as a numpy.ndarray""" 
     return _as_array_or_item(self._data) 
 @property 
 def values(self): 
     """The variable's data as a numpy.ndarray""" 
     return _as_array_or_item(self._data) 
+        elif not isinstance(self._data, np.ndarray):
             self._data = np.asarray(self._data)
         return self
 
-    def compute(self):
+    def compute(self, **kwargs):
         """Manually trigger loading of this variable's data from disk or a
         remote source into memory and return a new variable. The original is
         left unaltered.
 
         Normally, it should not be necessary to call this method in user code,
         because all xarray functions should either work on deferred data or
         load data automatically.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.array.compute``.
+
+        See Also
+        --------
+        dask.array.compute
         """
         new = self.copy(deep=False)
-        return new.load()
+        return new.load(**kwargs)
 
     @property
     def values(self):

diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
@@ -11,11 +11,12 @@
 from xarray.core.pycompat import suppress
 from . import TestCase, requires_dask
 
-from xarray.tests import unittest
+from xarray.tests import unittest, assert_equal
 
 with suppress(ImportError):
     import dask
     import dask.array as da
+    import dask.multiprocessing
 
 
 class DaskTestCase(TestCase):
@@ -182,6 +183,26 @@ def test_bivariate_ufunc(self):
         self.assertLazyAndAllClose(np.maximum(u, 0), xu.maximum(v, 0))
         self.assertLazyAndAllClose(np.maximum(u, 0), xu.maximum(0, v))
 
+    def test_compute_args(self):
+        a = DataArray([1, 2]).chunk()
+        expected = DataArray([1, 4])
+        b = a * a
+        # compute
+        b1 = b.compute(get=dask.multiprocessing.get)
+        assert b1._in_memory
+        assert_equal(b1, expected)
+        b2 = b.compute(get=dask.multiprocessing.get, num_workers=4)
+        assert b2._in_memory
+        assert_equal(b2, expected)
+        # load
+        b3 = b.load(get=dask.multiprocessing.get, num_workers=4)
+        assert b3._in_memory
+        assert_equal(b3, expected)
+        # persist
+        b4 = b.persist(get=dask.multiprocessing.get, num_workers=4)
+        assert b4._in_memory
+        assert_equal(b4, expected)
+
 
 @requires_dask
 class TestDataArrayAndDataset(DaskTestCase):
@@ -393,6 +414,32 @@ def test_from_dask_variable(self):
                       coords={'x': range(4)}, name='foo')
         self.assertLazyAndIdentical(self.lazy_array, a)
 
+    def test_compute_args(self):
+        a = DataArray([1, 2], name='a').chunk()
+        expected = DataArray([1, 4], name='expected')
+        b = a * a
+        # compute
+        b1 = b.compute(get=dask.multiprocessing.get)
+        assert b1._in_memory
+        assert_equal(b1, expected)
+        b2 = b.compute(get=dask.multiprocessing.get, num_workers=4)
+        assert b2._in_memory
+        assert_equal(b2, expected)
+        # load
+        b3 = b.load(get=dask.multiprocessing.get, num_workers=4)
+        assert b3._in_memory
+        assert_equal(b3, expected)
+        # persist
+        b4 = b.persist(get=dask.multiprocessing.get, num_workers=4)
+        assert b4._in_memory
+        assert_equal(b4, expected)
+
+        # dataset
+        ds = a.to_dataset()
+        ds.compute(get=dask.multiprocessing.get, num_workers=4)
+        ds.load(get=dask.multiprocessing.get, num_workers=4)
+        ds.persist(get=dask.multiprocessing.get, num_workers=4)
+
 
 kernel_call_count = 0
 def kernel():
@@ -403,6 +450,7 @@ def kernel():
     kernel_call_count += 1
     return np.ones(1)
 
+
 def build_dask_array():
     global kernel_call_count
     kernel_call_count = 0