pydata · jhamman · Sep 5, 2017 · Aug 31, 2017 · Sep 1, 2017 · Sep 5, 2017
diff --git a/ci/requirements-py27-cdat+pynio.yml b/ci/requirements-py27-cdat+pynio.yml
@@ -16,6 +16,7 @@ dependencies:
   - pathlib2
   - pynio
   - pytest
+  - mock
   - scipy
   - seaborn
   - toolz

diff --git a/ci/requirements-py27-min.yml b/ci/requirements-py27-min.yml
@@ -2,6 +2,7 @@ name: test_env
 dependencies:
   - python=2.7
   - pytest
+  - mock
   - numpy==1.11
   - pandas==0.18.0
   - pip:

diff --git a/ci/requirements-py27-windows.yml b/ci/requirements-py27-windows.yml
@@ -11,6 +11,7 @@ dependencies:
   - netcdf4
   - pathlib2
   - pytest
+  - mock
   - numpy
   - pandas
   - scipy

diff --git a/ci/requirements-py34.yml b/ci/requirements-py34.yml
@@ -3,6 +3,7 @@ dependencies:
   - python=3.4
   - bottleneck
   - pytest
+  - mock
   - pandas
   - pip:
     - coveralls

diff --git a/ci/requirements-py35.yml b/ci/requirements-py35.yml
@@ -10,6 +10,7 @@ dependencies:
   - matplotlib
   - netcdf4
   - pytest
+  - mock
   - numpy
   - pandas
   - scipy

diff --git a/ci/requirements-py36-bottleneck-dev.yml b/ci/requirements-py36-bottleneck-dev.yml
@@ -10,6 +10,7 @@ dependencies:
   - matplotlib
   - netcdf4
   - pytest
+  - mock
   - numpy
   - pandas
   - scipy

diff --git a/ci/requirements-py36-condaforge-rc.yml b/ci/requirements-py36-condaforge-rc.yml
@@ -11,6 +11,7 @@ dependencies:
   - matplotlib
   - netcdf4
   - pytest
+  - mock
   - numpy
   - pandas
   - seaborn

diff --git a/ci/requirements-py36-dask-dev.yml b/ci/requirements-py36-dask-dev.yml
@@ -8,6 +8,7 @@ dependencies:
   - matplotlib
   - netcdf4
   - pytest
+  - mock
   - numpy
   - pandas
   - seaborn

diff --git a/ci/requirements-py36-netcdf4-dev.yml b/ci/requirements-py36-netcdf4-dev.yml
@@ -10,6 +10,7 @@ dependencies:
   - h5netcdf
   - matplotlib
   - pytest
+  - mock
   - numpy
   - pandas
   - scipy

diff --git a/ci/requirements-py36-pandas-dev.yml b/ci/requirements-py36-pandas-dev.yml
@@ -11,6 +11,7 @@ dependencies:
   - matplotlib
   - netcdf4
   - pytest
+  - mock
   - numpy
   - scipy
   - toolz

diff --git a/ci/requirements-py36-windows.yml b/ci/requirements-py36-windows.yml
@@ -10,6 +10,7 @@ dependencies:
   - matplotlib
   - netcdf4
   - pytest
+  - mock
   - numpy
   - pandas
   - scipy

diff --git a/ci/requirements-py36.yml b/ci/requirements-py36.yml
@@ -10,6 +10,7 @@ dependencies:
   - matplotlib
   - netcdf4
   - pytest
+  - mock
   - numpy
   - pandas
   - scipy

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -97,6 +97,10 @@ Enhancements
   other means (:issue:`1459`).
   By `Ryan May <https://github.com/dopplershift>`_.
 
+ - Support passing keyword arguments to ``load``, ``compute``, and ``persist``
+   methods. Any keyword arguments supplied to these methods are passed on to
+   the corresponding dask function (:issue:`1523`).
+   By `Joe Hamman <https://github.com/jhamman>`_.
 - Encoding attributes are now preserved when xarray objects are concatenated.
   The encoding is copied from the first object  (:issue:`1297`).
   By `Joe Hamman <https://github.com/jhamman>`_ and

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -565,22 +565,31 @@ def reset_coords(self, names=None, drop=False, inplace=False):
             dataset[self.name] = self.variable
             return dataset
 
-    def load(self):
+    def load(self, **kwargs):
         """Manually trigger loading of this array's data from disk or a
         remote source into memory and return this array.
 
         Normally, it should not be necessary to call this method in user code,
         because all xarray functions should either work on deferred data or
         load data automatically. However, this method can be necessary when
         working with many file objects on disk.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.array.compute``.
+
+        See Also
+        --------
+        dask.array.compute
         """
-        ds = self._to_temp_dataset().load()
+        ds = self._to_temp_dataset().load(**kwargs)
         new = self._from_temp_dataset(ds)
         self._variable = new._variable
         self._coords = new._coords
         return self
 
-    def compute(self):
+    def compute(self, **kwargs):
         """Manually trigger loading of this array's data from disk or a
         remote source into memory and return a new array. The original is
         left unaltered.
@@ -589,18 +598,36 @@ def compute(self):
         because all xarray functions should either work on deferred data or
         load data automatically. However, this method can be necessary when
         working with many file objects on disk.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.array.compute``.
+
+        See Also
+        --------
+        dask.array.compute
         """
         new = self.copy(deep=False)
-        return new.load()
+        return new.load(**kwargs)
 
-    def persist(self):
+    def persist(self, **kwargs):
         """ Trigger computation in constituent dask arrays
 
         This keeps them as dask arrays but encourages them to keep data in
         memory.  This is particularly useful when on a distributed machine.
         When on a single machine consider using ``.compute()`` instead.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.persist``.
+
+        See Also
+        --------
+        dask.persist
         """
-        ds = self._to_temp_dataset().persist()
+        ds = self._to_temp_dataset().persist(**kwargs)
         return self._from_temp_dataset(ds)
 
     def copy(self, deep=True):

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -445,14 +445,23 @@ def sizes(self):
         """
         return self.dims
 
-    def load(self):
+    def load(self, **kwargs):
         """Manually trigger loading of this dataset's data from disk or a
         remote source into memory and return this dataset.
 
         Normally, it should not be necessary to call this method in user code,
         because all xarray functions should either work on deferred data or
         load data automatically. However, this method can be necessary when
         working with many file objects on disk.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.array.compute``.
+
+        See Also
+        --------
+        dask.array.compute
         """
         # access .data to coerce everything to numpy or dask arrays
         lazy_data = {k: v._data for k, v in self.variables.items()
@@ -461,7 +470,7 @@ def load(self):
             import dask.array as da
 
             # evaluate all the dask arrays simultaneously
-            evaluated_data = da.compute(*lazy_data.values())
+            evaluated_data = da.compute(*lazy_data.values(), **kwargs)
 
             for k, data in zip(lazy_data, evaluated_data):
                 self.variables[k].data = data
@@ -473,7 +482,7 @@ def load(self):
 
         return self
 
-    def compute(self):
+    def compute(self, **kwargs):
         """Manually trigger loading of this dataset's data from disk or a
         remote source into memory and return a new dataset. The original is
         left unaltered.
@@ -482,11 +491,20 @@ def compute(self):
         because all xarray functions should either work on deferred data or
         load data automatically. However, this method can be necessary when
         working with many file objects on disk.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.array.compute``.
+
+        See Also
+        --------
+        dask.array.compute
         """
         new = self.copy(deep=False)
-        return new.load()
+        return new.load(**kwargs)
 
-    def _persist_inplace(self):
+    def _persist_inplace(self, **kwargs):
         """ Persist all Dask arrays in memory """
         # access .data to coerce everything to numpy or dask arrays
         lazy_data = {k: v._data for k, v in self.variables.items()
@@ -495,24 +513,33 @@ def _persist_inplace(self):
             import dask
 
             # evaluate all the dask arrays simultaneously
-            evaluated_data = dask.persist(*lazy_data.values())
+            evaluated_data = dask.persist(*lazy_data.values(), **kwargs)
 
             for k, data in zip(lazy_data, evaluated_data):
                 self.variables[k].data = data
 
         return self
 
-    def persist(self):
+    def persist(self, **kwargs):
         """ Trigger computation, keeping data as dask arrays
 
         This operation can be used to trigger computation on underlying dask
         arrays, similar to ``.compute()``.  However this operation keeps the
         data as dask arrays.  This is particularly useful when using the
         dask.distributed scheduler and you want to load a large amount of data
         into distributed memory.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.persist``.
+
+        See Also
+        --------
+        dask.persist
         """
         new = self.copy(deep=False)
-        return new._persist_inplace()
+        return new._persist_inplace(**kwargs)
 
     @classmethod
     def _construct_direct(cls, variables, coord_names, dims=None, attrs=None,

diff --git a/xarray/core/variable.py b/xarray/core/variable.py
@@ -307,29 +307,49 @@ def data(self, data):
     def _indexable_data(self):
         return orthogonally_indexable(self._data)
 
-    def load(self):
+    def load(self, **kwargs):
         """Manually trigger loading of this variable's data from disk or a
         remote source into memory and return this variable.
 
         Normally, it should not be necessary to call this method in user code,
         because all xarray functions should either work on deferred data or
         load data automatically.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.array.compute``.
+
+        See Also
+        --------
+        dask.array.compute
         """
-        if not isinstance(self._data, np.ndarray):
+        if isinstance(self._data, dask_array_type):
+            self._data = np.asarray(self._data.compute(**kwargs))
 @property 
 def values(self): 
     """The variable's data as a numpy.ndarray""" 
     return _as_array_or_item(self._data) 
 @property 
 def values(self): 
     """The variable's data as a numpy.ndarray""" 
     return _as_array_or_item(self._data) 
+        elif not isinstance(self._data, np.ndarray):
             self._data = np.asarray(self._data)
         return self
 
-    def compute(self):
+    def compute(self, **kwargs):
         """Manually trigger loading of this variable's data from disk or a
         remote source into memory and return a new variable. The original is
         left unaltered.
 
         Normally, it should not be necessary to call this method in user code,
         because all xarray functions should either work on deferred data or
         load data automatically.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments passed on to ``dask.array.compute``.
+
+        See Also
+        --------
+        dask.array.compute
         """
         new = self.copy(deep=False)
-        return new.load()
+        return new.load(**kwargs)
 
     @property
     def values(self):
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,6 +16,7 @@ dependencies: @@
       - pathlib2
       - pynio
       - pytest
+      - mock
       - scipy
       - seaborn
       - toolz
@@ Expand Down @@