From 7f0a3b87622afb26af97ee90817212a01224cb61 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Thu, 31 Aug 2017 13:21:24 -0700 Subject: [PATCH 1/9] pass dask compute/persist args through from load/compute/perist --- xarray/core/dataarray.py | 39 +++++++++++++++++++++++++----- xarray/core/dataset.py | 43 ++++++++++++++++++++++++++------- xarray/core/variable.py | 28 ++++++++++++++++++---- xarray/tests/test_dask.py | 50 ++++++++++++++++++++++++++++++++++++++- 4 files changed, 141 insertions(+), 19 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 3f0fa85ba10..eef5ddb5f1a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -565,7 +565,7 @@ def reset_coords(self, names=None, drop=False, inplace=False): dataset[self.name] = self.variable return dataset - def load(self): + def load(self, **kwargs): """Manually trigger loading of this array's data from disk or a remote source into memory and return this array. @@ -573,14 +573,23 @@ def load(self): because all xarray functions should either work on deferred data or load data automatically. However, this method can be necessary when working with many file objects on disk. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.array.compute``. + + See Also + -------- + dask.array.compute """ - ds = self._to_temp_dataset().load() + ds = self._to_temp_dataset().load(**kwargs) new = self._from_temp_dataset(ds) self._variable = new._variable self._coords = new._coords return self - def compute(self): + def compute(self, **kwargs): """Manually trigger loading of this array's data from disk or a remote source into memory and return a new array. The original is left unaltered. @@ -589,18 +598,36 @@ def compute(self): because all xarray functions should either work on deferred data or load data automatically. However, this method can be necessary when working with many file objects on disk. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.array.compute``. + + See Also + -------- + dask.array.compute """ new = self.copy(deep=False) - return new.load() + return new.load(**kwargs) - def persist(self): + def persist(self, **kwargs): """ Trigger computation in constituent dask arrays This keeps them as dask arrays but encourages them to keep data in memory. This is particularly useful when on a distributed machine. When on a single machine consider using ``.compute()`` instead. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.persist``. + + See Also + -------- + dask.persist """ - ds = self._to_temp_dataset().persist() + ds = self._to_temp_dataset().persist(**kwargs) return self._from_temp_dataset(ds) def copy(self, deep=True): diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index bffdbf10724..80e02ebc0ce 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -445,7 +445,7 @@ def sizes(self): """ return self.dims - def load(self): + def load(self, **kwargs): """Manually trigger loading of this dataset's data from disk or a remote source into memory and return this dataset. @@ -453,6 +453,15 @@ def load(self): because all xarray functions should either work on deferred data or load data automatically. However, this method can be necessary when working with many file objects on disk. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.array.compute``. + + See Also + -------- + dask.array.compute """ # access .data to coerce everything to numpy or dask arrays lazy_data = {k: v._data for k, v in self.variables.items() @@ -461,7 +470,7 @@ def load(self): import dask.array as da # evaluate all the dask arrays simultaneously - evaluated_data = da.compute(*lazy_data.values()) + evaluated_data = da.compute(*lazy_data.values(), **kwargs) for k, data in zip(lazy_data, evaluated_data): self.variables[k].data = data @@ -473,7 +482,7 @@ def load(self): return self - def compute(self): + def compute(self, **kwargs): """Manually trigger loading of this dataset's data from disk or a remote source into memory and return a new dataset. The original is left unaltered. @@ -482,11 +491,20 @@ def compute(self): because all xarray functions should either work on deferred data or load data automatically. However, this method can be necessary when working with many file objects on disk. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.array.compute``. + + See Also + -------- + dask.array.compute """ new = self.copy(deep=False) - return new.load() + return new.load(**kwargs) - def _persist_inplace(self): + def _persist_inplace(self, **kwargs): """ Persist all Dask arrays in memory """ # access .data to coerce everything to numpy or dask arrays lazy_data = {k: v._data for k, v in self.variables.items() @@ -495,14 +513,14 @@ def _persist_inplace(self): import dask # evaluate all the dask arrays simultaneously - evaluated_data = dask.persist(*lazy_data.values()) + evaluated_data = dask.persist(*lazy_data.values(), **kwargs) for k, data in zip(lazy_data, evaluated_data): self.variables[k].data = data return self - def persist(self): + def persist(self, **kwargs): """ Trigger computation, keeping data as dask arrays This operation can be used to trigger computation on underlying dask @@ -510,9 +528,18 @@ def persist(self): data as dask arrays. This is particularly useful when using the dask.distributed scheduler and you want to load a large amount of data into distributed memory. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.persist``. + + See Also + -------- + dask.persist """ new = self.copy(deep=False) - return new._persist_inplace() + return new._persist_inplace(**kwargs) @classmethod def _construct_direct(cls, variables, coord_names, dims=None, attrs=None, diff --git a/xarray/core/variable.py b/xarray/core/variable.py index b02882bc2ac..16823aaa001 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -307,19 +307,30 @@ def data(self, data): def _indexable_data(self): return orthogonally_indexable(self._data) - def load(self): + def load(self, **kwargs): """Manually trigger loading of this variable's data from disk or a remote source into memory and return this variable. Normally, it should not be necessary to call this method in user code, because all xarray functions should either work on deferred data or load data automatically. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.array.compute``. + + See Also + -------- + dask.array.compute """ - if not isinstance(self._data, np.ndarray): + if isinstance(self._data, dask_array_type): + self._data = np.asarray(self._data.compute(**kwargs)) + elif not isinstance(self._data, np.ndarray): self._data = np.asarray(self._data) return self - def compute(self): + def compute(self, **kwargs): """Manually trigger loading of this variable's data from disk or a remote source into memory and return a new variable. The original is left unaltered. @@ -327,9 +338,18 @@ def compute(self): Normally, it should not be necessary to call this method in user code, because all xarray functions should either work on deferred data or load data automatically. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.array.compute``. + + See Also + -------- + dask.array.compute """ new = self.copy(deep=False) - return new.load() + return new.load(**kwargs) @property def values(self): diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 92f616b8bd6..5878fe36646 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -11,11 +11,12 @@ from xarray.core.pycompat import suppress from . import TestCase, requires_dask -from xarray.tests import unittest +from xarray.tests import unittest, assert_equal with suppress(ImportError): import dask import dask.array as da + import dask.multiprocessing class DaskTestCase(TestCase): @@ -182,6 +183,26 @@ def test_bivariate_ufunc(self): self.assertLazyAndAllClose(np.maximum(u, 0), xu.maximum(v, 0)) self.assertLazyAndAllClose(np.maximum(u, 0), xu.maximum(0, v)) + def test_compute_args(self): + a = DataArray([1, 2]).chunk() + expected = DataArray([1, 4]) + b = a * a + # compute + b1 = b.compute(get=dask.multiprocessing.get) + assert b1._in_memory + assert_equal(b1, expected) + b2 = b.compute(get=dask.multiprocessing.get, num_workers=4) + assert b2._in_memory + assert_equal(b2, expected) + # load + b3 = b.load(get=dask.multiprocessing.get, num_workers=4) + assert b3._in_memory + assert_equal(b3, expected) + # persist + b4 = b.persist(get=dask.multiprocessing.get, num_workers=4) + assert b4._in_memory + assert_equal(b4, expected) + @requires_dask class TestDataArrayAndDataset(DaskTestCase): @@ -393,6 +414,32 @@ def test_from_dask_variable(self): coords={'x': range(4)}, name='foo') self.assertLazyAndIdentical(self.lazy_array, a) + def test_compute_args(self): + a = DataArray([1, 2], name='a').chunk() + expected = DataArray([1, 4], name='expected') + b = a * a + # compute + b1 = b.compute(get=dask.multiprocessing.get) + assert b1._in_memory + assert_equal(b1, expected) + b2 = b.compute(get=dask.multiprocessing.get, num_workers=4) + assert b2._in_memory + assert_equal(b2, expected) + # load + b3 = b.load(get=dask.multiprocessing.get, num_workers=4) + assert b3._in_memory + assert_equal(b3, expected) + # persist + b4 = b.persist(get=dask.multiprocessing.get, num_workers=4) + assert b4._in_memory + assert_equal(b4, expected) + + # dataset + ds = a.to_dataset() + ds.compute(get=dask.multiprocessing.get, num_workers=4) + ds.load(get=dask.multiprocessing.get, num_workers=4) + ds.persist(get=dask.multiprocessing.get, num_workers=4) + kernel_call_count = 0 def kernel(): @@ -403,6 +450,7 @@ def kernel(): kernel_call_count += 1 return np.ones(1) + def build_dask_array(): global kernel_call_count kernel_call_count = 0 From df4f5d96cc50329756c8eb7bee9bb7026044044b Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Fri, 1 Sep 2017 08:18:56 -0700 Subject: [PATCH 2/9] fix test and whatsnew note --- doc/whats-new.rst | 5 +++++ xarray/tests/test_dask.py | 8 ++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d74ebc05391..c13d0e0c720 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -75,6 +75,11 @@ Enhancements other means (:issue:`1459`). By `Ryan May `_. + - Support passing keyword arguments to ``load``, ``compute``, and ``persist`` + methods. Any keyword arguments supplied to these methods are passed on to + the corresponding dask function (:issue:`1523`). + By `Joe Hamman `_. + Bug fixes ~~~~~~~~~ diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 5878fe36646..329c5ef12c8 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -184,8 +184,8 @@ def test_bivariate_ufunc(self): self.assertLazyAndAllClose(np.maximum(u, 0), xu.maximum(0, v)) def test_compute_args(self): - a = DataArray([1, 2]).chunk() - expected = DataArray([1, 4]) + a = Variable('x', [1, 2]).chunk() + expected = Variable('x', [1, 4]) b = a * a # compute b1 = b.compute(get=dask.multiprocessing.get) @@ -198,10 +198,6 @@ def test_compute_args(self): b3 = b.load(get=dask.multiprocessing.get, num_workers=4) assert b3._in_memory assert_equal(b3, expected) - # persist - b4 = b.persist(get=dask.multiprocessing.get, num_workers=4) - assert b4._in_memory - assert_equal(b4, expected) @requires_dask From 490784ab2aff81ce5d7d36096f838fa62eed7613 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 5 Sep 2017 09:41:00 -0700 Subject: [PATCH 3/9] test dask compute args with mock --- ci/requirements-py27-cdat+pynio.yml | 1 + ci/requirements-py27-min.yml | 1 + ci/requirements-py27-windows.yml | 1 + ci/requirements-py34.yml | 1 + ci/requirements-py35.yml | 1 + ci/requirements-py36-bottleneck-dev.yml | 1 + ci/requirements-py36-condaforge-rc.yml | 1 + ci/requirements-py36-dask-dev.yml | 1 + ci/requirements-py36-netcdf4-dev.yml | 1 + ci/requirements-py36-pandas-dev.yml | 1 + ci/requirements-py36-windows.yml | 1 + ci/requirements-py36.yml | 1 + xarray/tests/test_dask.py | 95 ++++++++++++++----------- 13 files changed, 66 insertions(+), 41 deletions(-) diff --git a/ci/requirements-py27-cdat+pynio.yml b/ci/requirements-py27-cdat+pynio.yml index 0258c8c9672..ccd3fbf9cb4 100644 --- a/ci/requirements-py27-cdat+pynio.yml +++ b/ci/requirements-py27-cdat+pynio.yml @@ -16,6 +16,7 @@ dependencies: - pathlib2 - pynio - pytest + - mock - scipy - seaborn - toolz diff --git a/ci/requirements-py27-min.yml b/ci/requirements-py27-min.yml index 9c7d7c5a9e9..6f63315db67 100644 --- a/ci/requirements-py27-min.yml +++ b/ci/requirements-py27-min.yml @@ -2,6 +2,7 @@ name: test_env dependencies: - python=2.7 - pytest + - mock - numpy==1.11 - pandas==0.18.0 - pip: diff --git a/ci/requirements-py27-windows.yml b/ci/requirements-py27-windows.yml index e953b5ffdcb..73baca68dfa 100644 --- a/ci/requirements-py27-windows.yml +++ b/ci/requirements-py27-windows.yml @@ -11,6 +11,7 @@ dependencies: - netcdf4 - pathlib2 - pytest + - mock - numpy - pandas - scipy diff --git a/ci/requirements-py34.yml b/ci/requirements-py34.yml index a49611751ca..e77ec805c26 100644 --- a/ci/requirements-py34.yml +++ b/ci/requirements-py34.yml @@ -3,6 +3,7 @@ dependencies: - python=3.4 - bottleneck - pytest + - mock - pandas - pip: - coveralls diff --git a/ci/requirements-py35.yml b/ci/requirements-py35.yml index 1c7a4558c91..ae41ded57ce 100644 --- a/ci/requirements-py35.yml +++ b/ci/requirements-py35.yml @@ -10,6 +10,7 @@ dependencies: - matplotlib - netcdf4 - pytest + - mock - numpy - pandas - scipy diff --git a/ci/requirements-py36-bottleneck-dev.yml b/ci/requirements-py36-bottleneck-dev.yml index 8e2fd98cfcf..913f312c2c7 100644 --- a/ci/requirements-py36-bottleneck-dev.yml +++ b/ci/requirements-py36-bottleneck-dev.yml @@ -10,6 +10,7 @@ dependencies: - matplotlib - netcdf4 - pytest + - mock - numpy - pandas - scipy diff --git a/ci/requirements-py36-condaforge-rc.yml b/ci/requirements-py36-condaforge-rc.yml index 8426ca2df42..940cf41e1c9 100644 --- a/ci/requirements-py36-condaforge-rc.yml +++ b/ci/requirements-py36-condaforge-rc.yml @@ -11,6 +11,7 @@ dependencies: - matplotlib - netcdf4 - pytest + - mock - numpy - pandas - seaborn diff --git a/ci/requirements-py36-dask-dev.yml b/ci/requirements-py36-dask-dev.yml index 8606e15d614..cde30e0b55f 100644 --- a/ci/requirements-py36-dask-dev.yml +++ b/ci/requirements-py36-dask-dev.yml @@ -8,6 +8,7 @@ dependencies: - matplotlib - netcdf4 - pytest + - mock - numpy - pandas - seaborn diff --git a/ci/requirements-py36-netcdf4-dev.yml b/ci/requirements-py36-netcdf4-dev.yml index 033d1f41b4d..3e0467362fc 100644 --- a/ci/requirements-py36-netcdf4-dev.yml +++ b/ci/requirements-py36-netcdf4-dev.yml @@ -10,6 +10,7 @@ dependencies: - h5netcdf - matplotlib - pytest + - mock - numpy - pandas - scipy diff --git a/ci/requirements-py36-pandas-dev.yml b/ci/requirements-py36-pandas-dev.yml index ebcec868f76..7758cdcf680 100644 --- a/ci/requirements-py36-pandas-dev.yml +++ b/ci/requirements-py36-pandas-dev.yml @@ -11,6 +11,7 @@ dependencies: - matplotlib - netcdf4 - pytest + - mock - numpy - scipy - toolz diff --git a/ci/requirements-py36-windows.yml b/ci/requirements-py36-windows.yml index 70ff3e50a1b..1ba2674eedf 100644 --- a/ci/requirements-py36-windows.yml +++ b/ci/requirements-py36-windows.yml @@ -10,6 +10,7 @@ dependencies: - matplotlib - netcdf4 - pytest + - mock - numpy - pandas - scipy diff --git a/ci/requirements-py36.yml b/ci/requirements-py36.yml index 3022c1a0886..a8117ce43e3 100644 --- a/ci/requirements-py36.yml +++ b/ci/requirements-py36.yml @@ -10,6 +10,7 @@ dependencies: - matplotlib - netcdf4 - pytest + - mock - numpy - pandas - scipy diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 329c5ef12c8..b3c06ebe61c 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -4,6 +4,8 @@ import pickle import numpy as np import pandas as pd +import pytest +import mock import xarray as xr from xarray import Variable, DataArray, Dataset @@ -183,22 +185,6 @@ def test_bivariate_ufunc(self): self.assertLazyAndAllClose(np.maximum(u, 0), xu.maximum(v, 0)) self.assertLazyAndAllClose(np.maximum(u, 0), xu.maximum(0, v)) - def test_compute_args(self): - a = Variable('x', [1, 2]).chunk() - expected = Variable('x', [1, 4]) - b = a * a - # compute - b1 = b.compute(get=dask.multiprocessing.get) - assert b1._in_memory - assert_equal(b1, expected) - b2 = b.compute(get=dask.multiprocessing.get, num_workers=4) - assert b2._in_memory - assert_equal(b2, expected) - # load - b3 = b.load(get=dask.multiprocessing.get, num_workers=4) - assert b3._in_memory - assert_equal(b3, expected) - @requires_dask class TestDataArrayAndDataset(DaskTestCase): @@ -410,31 +396,58 @@ def test_from_dask_variable(self): coords={'x': range(4)}, name='foo') self.assertLazyAndIdentical(self.lazy_array, a) - def test_compute_args(self): - a = DataArray([1, 2], name='a').chunk() - expected = DataArray([1, 4], name='expected') - b = a * a - # compute - b1 = b.compute(get=dask.multiprocessing.get) - assert b1._in_memory - assert_equal(b1, expected) - b2 = b.compute(get=dask.multiprocessing.get, num_workers=4) - assert b2._in_memory - assert_equal(b2, expected) - # load - b3 = b.load(get=dask.multiprocessing.get, num_workers=4) - assert b3._in_memory - assert_equal(b3, expected) - # persist - b4 = b.persist(get=dask.multiprocessing.get, num_workers=4) - assert b4._in_memory - assert_equal(b4, expected) - - # dataset - ds = a.to_dataset() - ds.compute(get=dask.multiprocessing.get, num_workers=4) - ds.load(get=dask.multiprocessing.get, num_workers=4) - ds.persist(get=dask.multiprocessing.get, num_workers=4) + +@pytest.mark.parametrize("method", ['load', 'compute']) +def test_dask_kwargs_variable(method): + x = Variable('y', da.from_array(np.arange(3), chunks=(2,))) + with mock.patch.object(Variable, method, + return_value=np.arange(3)) as mock_method: + getattr(x, method)(foo='bar') + mock_method.assert_called_with(foo='bar') + + # args should be passed on to da.Array.compute() + with mock.patch.object(da.Array, 'compute', + return_value=np.arange(3)) as mock_compute: + getattr(x, method)(foo='bar') + mock_compute.assert_called_with(foo='bar') + + +@pytest.mark.parametrize("method", ['load', 'compute', 'persist']) +def test_dask_kwargs_dataarray(method): + data = da.from_array(np.arange(3), chunks=(2,)) + x = DataArray(data) + with mock.patch.object(DataArray, method, + return_value=np.arange(3)) as mock_method: + getattr(x, method)(foo='bar') + mock_method.assert_called_with(foo='bar') + + if method in ['load', 'compute']: + dask_func = 'dask.array.compute' + else: + dask_func = 'dask.persist' + # args should be passed on to "dask_func" + with mock.patch(dask_func) as mock_func: + getattr(x, method)(foo='bar') + mock_func.assert_called_with(data, foo='bar') + + +@pytest.mark.parametrize("method", ['load', 'compute', 'persist']) +def test_dask_kwargs_dataset(method): + data = da.from_array(np.arange(3), chunks=(2,)) + x = Dataset({'x': (('y'), data)}) + with mock.patch.object(Dataset, method, + return_value=np.arange(3)) as mock_method: + getattr(x, method)(foo='bar') + mock_method.assert_called_with(foo='bar') + + if method in ['load', 'compute']: + dask_func = 'dask.array.compute' + else: + dask_func = 'dask.persist' + # args should be passed on to "dask_func" + with mock.patch(dask_func) as mock_func: + getattr(x, method)(foo='bar') + mock_func.assert_called_with(data, foo='bar') kernel_call_count = 0 From a7af62a66ed629ba85bb7e35180b209a9f52ce1d Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 5 Sep 2017 09:56:22 -0700 Subject: [PATCH 4/9] use as_compatible_data instead of np.asarray --- xarray/core/variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 79af65a6459..dc8f3b39d2d 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -325,7 +325,7 @@ def load(self, **kwargs): dask.array.compute """ if isinstance(self._data, dask_array_type): - self._data = np.asarray(self._data.compute(**kwargs)) + self._data = as_compatible_data(self._data.compute(**kwargs)) elif not isinstance(self._data, np.ndarray): self._data = np.asarray(self._data) return self From 2b506c546698c3f90caa2f573fc174d05c8b7b30 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 5 Sep 2017 09:57:40 -0700 Subject: [PATCH 5/9] requires dask --- xarray/tests/test_dask.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index b3c06ebe61c..6cdedfd4bc3 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -412,6 +412,7 @@ def test_dask_kwargs_variable(method): mock_compute.assert_called_with(foo='bar') +@requires_dask @pytest.mark.parametrize("method", ['load', 'compute', 'persist']) def test_dask_kwargs_dataarray(method): data = da.from_array(np.arange(3), chunks=(2,)) @@ -430,7 +431,7 @@ def test_dask_kwargs_dataarray(method): getattr(x, method)(foo='bar') mock_func.assert_called_with(data, foo='bar') - +@requires_dask @pytest.mark.parametrize("method", ['load', 'compute', 'persist']) def test_dask_kwargs_dataset(method): data = da.from_array(np.arange(3), chunks=(2,)) From a2dbe2646069e7725823dbd7b3eb4100db744fa2 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 5 Sep 2017 10:00:03 -0700 Subject: [PATCH 6/9] requires dask --- xarray/tests/test_dask.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 6cdedfd4bc3..ca4e3fae6d8 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -397,6 +397,7 @@ def test_from_dask_variable(self): self.assertLazyAndIdentical(self.lazy_array, a) +@requires_dask @pytest.mark.parametrize("method", ['load', 'compute']) def test_dask_kwargs_variable(method): x = Variable('y', da.from_array(np.arange(3), chunks=(2,))) @@ -431,6 +432,7 @@ def test_dask_kwargs_dataarray(method): getattr(x, method)(foo='bar') mock_func.assert_called_with(data, foo='bar') + @requires_dask @pytest.mark.parametrize("method", ['load', 'compute', 'persist']) def test_dask_kwargs_dataset(method): From 6fd941fe9b5161907b8e24b143665550876324ef Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 5 Sep 2017 11:07:48 -0700 Subject: [PATCH 7/9] cleanup tests and update docs --- ci/requirements-py34.yml | 1 - ci/requirements-py35.yml | 1 - ci/requirements-py36-bottleneck-dev.yml | 1 - ci/requirements-py36-condaforge-rc.yml | 1 - ci/requirements-py36-dask-dev.yml | 1 - ci/requirements-py36-netcdf4-dev.yml | 1 - ci/requirements-py36-pandas-dev.yml | 1 - ci/requirements-py36-windows.yml | 1 - ci/requirements-py36.yml | 1 - doc/installing.rst | 10 ++++++++-- xarray/tests/__init__.py | 5 +++++ xarray/tests/test_dask.py | 18 +----------------- 12 files changed, 14 insertions(+), 28 deletions(-) diff --git a/ci/requirements-py34.yml b/ci/requirements-py34.yml index e77ec805c26..a49611751ca 100644 --- a/ci/requirements-py34.yml +++ b/ci/requirements-py34.yml @@ -3,7 +3,6 @@ dependencies: - python=3.4 - bottleneck - pytest - - mock - pandas - pip: - coveralls diff --git a/ci/requirements-py35.yml b/ci/requirements-py35.yml index ae41ded57ce..1c7a4558c91 100644 --- a/ci/requirements-py35.yml +++ b/ci/requirements-py35.yml @@ -10,7 +10,6 @@ dependencies: - matplotlib - netcdf4 - pytest - - mock - numpy - pandas - scipy diff --git a/ci/requirements-py36-bottleneck-dev.yml b/ci/requirements-py36-bottleneck-dev.yml index 913f312c2c7..8e2fd98cfcf 100644 --- a/ci/requirements-py36-bottleneck-dev.yml +++ b/ci/requirements-py36-bottleneck-dev.yml @@ -10,7 +10,6 @@ dependencies: - matplotlib - netcdf4 - pytest - - mock - numpy - pandas - scipy diff --git a/ci/requirements-py36-condaforge-rc.yml b/ci/requirements-py36-condaforge-rc.yml index 940cf41e1c9..8426ca2df42 100644 --- a/ci/requirements-py36-condaforge-rc.yml +++ b/ci/requirements-py36-condaforge-rc.yml @@ -11,7 +11,6 @@ dependencies: - matplotlib - netcdf4 - pytest - - mock - numpy - pandas - seaborn diff --git a/ci/requirements-py36-dask-dev.yml b/ci/requirements-py36-dask-dev.yml index cde30e0b55f..8606e15d614 100644 --- a/ci/requirements-py36-dask-dev.yml +++ b/ci/requirements-py36-dask-dev.yml @@ -8,7 +8,6 @@ dependencies: - matplotlib - netcdf4 - pytest - - mock - numpy - pandas - seaborn diff --git a/ci/requirements-py36-netcdf4-dev.yml b/ci/requirements-py36-netcdf4-dev.yml index 3e0467362fc..033d1f41b4d 100644 --- a/ci/requirements-py36-netcdf4-dev.yml +++ b/ci/requirements-py36-netcdf4-dev.yml @@ -10,7 +10,6 @@ dependencies: - h5netcdf - matplotlib - pytest - - mock - numpy - pandas - scipy diff --git a/ci/requirements-py36-pandas-dev.yml b/ci/requirements-py36-pandas-dev.yml index 7758cdcf680..ebcec868f76 100644 --- a/ci/requirements-py36-pandas-dev.yml +++ b/ci/requirements-py36-pandas-dev.yml @@ -11,7 +11,6 @@ dependencies: - matplotlib - netcdf4 - pytest - - mock - numpy - scipy - toolz diff --git a/ci/requirements-py36-windows.yml b/ci/requirements-py36-windows.yml index 1ba2674eedf..70ff3e50a1b 100644 --- a/ci/requirements-py36-windows.yml +++ b/ci/requirements-py36-windows.yml @@ -10,7 +10,6 @@ dependencies: - matplotlib - netcdf4 - pytest - - mock - numpy - pandas - scipy diff --git a/ci/requirements-py36.yml b/ci/requirements-py36.yml index a8117ce43e3..3022c1a0886 100644 --- a/ci/requirements-py36.yml +++ b/ci/requirements-py36.yml @@ -10,7 +10,6 @@ dependencies: - matplotlib - netcdf4 - pytest - - mock - numpy - pandas - scipy diff --git a/doc/installing.rst b/doc/installing.rst index a316ef38fc5..522577a078b 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -73,6 +73,12 @@ pandas) installed first. Then, install xarray with pip:: $ pip install xarray -To run the test suite after installing xarray, install -`py.test `__ (``pip install pytest``) and run +Testing +------- + +To run the test suite after installing xarray, first install (via pypi or conda) +- `py.test `__: Simple unit testing library +- `mock `__: additional testing library required for python version 2 + +and run ``py.test --pyargs xarray``. diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 7afad6ffe92..05c4cd340cb 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -20,6 +20,11 @@ except ImportError: import unittest +try: + from unittest import mock +except ImportError: + import mock + try: import scipy has_scipy = True diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index ca4e3fae6d8..d35f9e3b782 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd import pytest -import mock import xarray as xr from xarray import Variable, DataArray, Dataset @@ -13,7 +12,7 @@ from xarray.core.pycompat import suppress from . import TestCase, requires_dask -from xarray.tests import unittest, assert_equal +from xarray.tests import unittest, assert_equal, mock with suppress(ImportError): import dask @@ -401,11 +400,6 @@ def test_from_dask_variable(self): @pytest.mark.parametrize("method", ['load', 'compute']) def test_dask_kwargs_variable(method): x = Variable('y', da.from_array(np.arange(3), chunks=(2,))) - with mock.patch.object(Variable, method, - return_value=np.arange(3)) as mock_method: - getattr(x, method)(foo='bar') - mock_method.assert_called_with(foo='bar') - # args should be passed on to da.Array.compute() with mock.patch.object(da.Array, 'compute', return_value=np.arange(3)) as mock_compute: @@ -418,11 +412,6 @@ def test_dask_kwargs_variable(method): def test_dask_kwargs_dataarray(method): data = da.from_array(np.arange(3), chunks=(2,)) x = DataArray(data) - with mock.patch.object(DataArray, method, - return_value=np.arange(3)) as mock_method: - getattr(x, method)(foo='bar') - mock_method.assert_called_with(foo='bar') - if method in ['load', 'compute']: dask_func = 'dask.array.compute' else: @@ -438,11 +427,6 @@ def test_dask_kwargs_dataarray(method): def test_dask_kwargs_dataset(method): data = da.from_array(np.arange(3), chunks=(2,)) x = Dataset({'x': (('y'), data)}) - with mock.patch.object(Dataset, method, - return_value=np.arange(3)) as mock_method: - getattr(x, method)(foo='bar') - mock_method.assert_called_with(foo='bar') - if method in ['load', 'compute']: dask_func = 'dask.array.compute' else: From b5cc3bb8125c6195d72f1fe9546cd45ac9006c54 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 5 Sep 2017 11:14:04 -0700 Subject: [PATCH 8/9] update setup.py --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 6ff8de60666..e157a825d07 100644 --- a/setup.py +++ b/setup.py @@ -37,6 +37,8 @@ INSTALL_REQUIRES = ['numpy >= 1.11', 'pandas >= 0.18.0'] TESTS_REQUIRE = ['pytest >= 2.7.1'] +if sys.version_info[0] < 3: + TESTS_REQUIRE.append('mock') DESCRIPTION = "N-D labeled arrays and datasets in Python" LONG_DESCRIPTION = """ From a879214ad6fe9b513c8f41e91ff190dd5098266f Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 5 Sep 2017 11:19:12 -0700 Subject: [PATCH 9/9] cleanup imports --- xarray/tests/test_dask.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index d35f9e3b782..422c34adfa3 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -12,12 +12,11 @@ from xarray.core.pycompat import suppress from . import TestCase, requires_dask -from xarray.tests import unittest, assert_equal, mock +from xarray.tests import unittest, mock with suppress(ImportError): import dask import dask.array as da - import dask.multiprocessing class DaskTestCase(TestCase):