diff --git a/doc/api.rst b/doc/api.rst index 927c0aa072c..ce15e8f9d49 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -150,6 +150,7 @@ Computation Dataset.resample Dataset.diff Dataset.quantile + Dataset.differentiate **Aggregation**: :py:attr:`~Dataset.all` @@ -317,6 +318,7 @@ Computation DataArray.diff DataArray.dot DataArray.quantile + DataArray.differentiate **Aggregation**: :py:attr:`~DataArray.all` diff --git a/doc/computation.rst b/doc/computation.rst index 6793e667e06..67cda6f2191 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -200,6 +200,29 @@ You can also use ``construct`` to compute a weighted rolling sum: To avoid this, use ``skipna=False`` as the above example. +Computation using Coordinates +============================= + +Xarray objects have some handy methods for the computation with their +coordinates. :py:meth:`~xarray.DataArray.differentiate` computes derivatives by +central finite differences using their coordinates, + +.. ipython:: python + a = xr.DataArray([0, 1, 2, 3], dims=['x'], coords=[0.1, 0.11, 0.2, 0.3]) + a + a.differentiate('x') + +This method can be used also for multidimensional arrays, + +.. ipython:: python + a = xr.DataArray(np.arange(8).reshape(4, 2), dims=['x', 'y'], + coords=[0.1, 0.11, 0.2, 0.3]) + a.differentiate('x') + +.. note:: + This method is limited to simple cartesian geometry. Differentiation along + multidimensional coordinate is not supported. + .. _compute.broadcasting: Broadcasting by dimension name diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 881ae52cdeb..82d315bdfc8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,6 +36,10 @@ Documentation Enhancements ~~~~~~~~~~~~ +- :py:meth:`~xarray.DataArray.differentiate` and + :py:meth:`~xarray.Dataset.differentiate` are newly added. + (:issue:`1332`) + By `Keisuke Fujii `_. - Default colormap for sequential and divergent data can now be set via :py:func:`~xarray.set_options()` (:issue:`2394`) diff --git a/xarray/__init__.py b/xarray/__init__.py index 7cc7811b783..27a3f6b1eff 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -10,7 +10,7 @@ from .core.alignment import align, broadcast, broadcast_arrays from .core.common import full_like, zeros_like, ones_like from .core.combine import concat, auto_combine -from .core.computation import apply_ufunc, where, dot +from .core.computation import apply_ufunc, dot, where from .core.extensions import (register_dataarray_accessor, register_dataset_accessor) from .core.variable import as_variable, Variable, IndexVariable, Coordinate diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index c2417345f55..5e6b81a253d 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -1,6 +1,9 @@ from __future__ import absolute_import, division, print_function +from distutils.version import LooseVersion + import numpy as np +from dask import __version__ as dask_version import dask.array as da try: @@ -30,3 +33,124 @@ def isin(element, test_elements, assume_unique=False, invert=False): if invert: result = ~result return result + + +if LooseVersion(dask_version) > LooseVersion('1.19.2'): + gradient = da.gradient + +else: # pragma: no cover + # Copied from dask v0.19.2 + # Used under the terms of Dask's license, see licenses/DASK_LICENSE. + import math + from numbers import Integral, Real + + AxisError = np.AxisError + + def validate_axis(axis, ndim): + """ Validate an input to axis= keywords """ + if isinstance(axis, (tuple, list)): + return tuple(validate_axis(ax, ndim) for ax in axis) + if not isinstance(axis, Integral): + raise TypeError("Axis value must be an integer, got %s" % axis) + if axis < -ndim or axis >= ndim: + raise AxisError("Axis %d is out of bounds for array of dimension " + "%d" % (axis, ndim)) + if axis < 0: + axis += ndim + return axis + + def _gradient_kernel(x, block_id, coord, axis, array_locs, grad_kwargs): + """ + x: nd-array + array of one block + coord: 1d-array or scalar + coordinate along which the gradient is computed. + axis: int + axis along which the gradient is computed + array_locs: + actual location along axis. None if coordinate is scalar + grad_kwargs: + keyword to be passed to np.gradient + """ + block_loc = block_id[axis] + if array_locs is not None: + coord = coord[array_locs[0][block_loc]:array_locs[1][block_loc]] + grad = np.gradient(x, coord, axis=axis, **grad_kwargs) + return grad + + def gradient(f, *varargs, **kwargs): + f = da.asarray(f) + + kwargs["edge_order"] = math.ceil(kwargs.get("edge_order", 1)) + if kwargs["edge_order"] > 2: + raise ValueError("edge_order must be less than or equal to 2.") + + drop_result_list = False + axis = kwargs.pop("axis", None) + if axis is None: + axis = tuple(range(f.ndim)) + elif isinstance(axis, Integral): + drop_result_list = True + axis = (axis,) + + axis = validate_axis(axis, f.ndim) + + if len(axis) != len(set(axis)): + raise ValueError("duplicate axes not allowed") + + axis = tuple(ax % f.ndim for ax in axis) + + if varargs == (): + varargs = (1,) + if len(varargs) == 1: + varargs = len(axis) * varargs + if len(varargs) != len(axis): + raise TypeError( + "Spacing must either be a single scalar, or a scalar / " + "1d-array per axis" + ) + + if issubclass(f.dtype.type, (np.bool8, Integral)): + f = f.astype(float) + elif issubclass(f.dtype.type, Real) and f.dtype.itemsize < 4: + f = f.astype(float) + + results = [] + for i, ax in enumerate(axis): + for c in f.chunks[ax]: + if np.min(c) < kwargs["edge_order"] + 1: + raise ValueError( + 'Chunk size must be larger than edge_order + 1. ' + 'Minimum chunk for aixs {} is {}. Rechunk to ' + 'proceed.'.format(np.min(c), ax)) + + if np.isscalar(varargs[i]): + array_locs = None + else: + if isinstance(varargs[i], da.Array): + raise NotImplementedError( + 'dask array coordinated is not supported.') + # coordinate position for each block taking overlap into + # account + chunk = np.array(f.chunks[ax]) + array_loc_stop = np.cumsum(chunk) + 1 + array_loc_start = array_loc_stop - chunk - 2 + array_loc_stop[-1] -= 1 + array_loc_start[0] = 0 + array_locs = (array_loc_start, array_loc_stop) + + results.append(f.map_overlap( + _gradient_kernel, + dtype=f.dtype, + depth={j: 1 if j == ax else 0 for j in range(f.ndim)}, + boundary="none", + coord=varargs[i], + axis=ax, + array_locs=array_locs, + grad_kwargs=kwargs, + )) + + if drop_result_list: + results = results[0] + + return results diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index ae3758f0bbd..03ea496658f 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2010,6 +2010,9 @@ def diff(self, dim, n=1, label='upper'): Coordinates: * x (x) int64 3 4 + See Also + -------- + DataArray.differentiate """ ds = self._to_temp_dataset().diff(n=n, dim=dim, label=label) return self._from_temp_dataset(ds) @@ -2289,6 +2292,61 @@ def rank(self, dim, pct=False, keep_attrs=False): ds = self._to_temp_dataset().rank(dim, pct=pct, keep_attrs=keep_attrs) return self._from_temp_dataset(ds) + def differentiate(self, coord, edge_order=1, datetime_unit=None): + """ Differentiate the array with the second order accurate central + differences. + + .. note:: + This feature is limited to simple cartesian geometry, i.e. coord + must be one dimensional. + + Parameters + ---------- + coord: str + The coordinate to be used to compute the gradient. + edge_order: 1 or 2. Default 1 + N-th order accurate differences at the boundaries. + datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', + 'us', 'ns', 'ps', 'fs', 'as'} + Unit to compute gradient. Only valid for datetime coordinate. + + Returns + ------- + differentiated: DataArray + + See also + -------- + numpy.gradient: corresponding numpy function + + Examples + -------- + + >>> da = xr.DataArray(np.arange(12).reshape(4, 3), dims=['x', 'y'], + ... coords={'x': [0, 0.1, 1.1, 1.2]}) + >>> da + + array([[ 0, 1, 2], + [ 3, 4, 5], + [ 6, 7, 8], + [ 9, 10, 11]]) + Coordinates: + * x (x) float64 0.0 0.1 1.1 1.2 + Dimensions without coordinates: y + >>> + >>> da.differentiate('x') + + array([[30. , 30. , 30. ], + [27.545455, 27.545455, 27.545455], + [27.545455, 27.545455, 27.545455], + [30. , 30. , 30. ]]) + Coordinates: + * x (x) float64 0.0 0.1 1.1 1.2 + Dimensions without coordinates: y + """ + ds = self._to_temp_dataset().differentiate( + coord, edge_order, datetime_unit) + return self._from_temp_dataset(ds) + # priority most be higher than Variable to properly work with binary ufuncs ops.inject_all_ops_and_reduce_methods(DataArray, priority=60) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index e98495e71fb..131345693df 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -13,8 +13,8 @@ import xarray as xr from . import ( - alignment, duck_array_ops, formatting, groupby, indexing, ops, resample, - rolling, utils) + alignment, computation, duck_array_ops, formatting, groupby, indexing, ops, + resample, rolling, utils) from .. import conventions from .alignment import align from .common import ( @@ -31,7 +31,7 @@ OrderedDict, basestring, dask_array_type, integer_types, iteritems, range) from .utils import ( Frozen, SortedKeysDict, either_dict_or_kwargs, decode_numpy_dict_values, - ensure_us_time_resolution, hashable, maybe_wrap_array) + ensure_us_time_resolution, hashable, maybe_wrap_array, to_numeric) from .variable import IndexVariable, Variable, as_variable, broadcast_variables # list of attributes of pd.DatetimeIndex that are ndarrays of time info @@ -3313,6 +3313,9 @@ def diff(self, dim, n=1, label='upper'): Data variables: foo (x) int64 1 -1 + See Also + -------- + Dataset.differentiate """ if n == 0: return self @@ -3663,6 +3666,62 @@ def rank(self, dim, pct=False, keep_attrs=False): attrs = self.attrs if keep_attrs else None return self._replace_vars_and_dims(variables, coord_names, attrs=attrs) + def differentiate(self, coord, edge_order=1, datetime_unit=None): + """ Differentiate with the second order accurate central + differences. + + .. note:: + This feature is limited to simple cartesian geometry, i.e. coord + must be one dimensional. + + Parameters + ---------- + coord: str + The coordinate to be used to compute the gradient. + edge_order: 1 or 2. Default 1 + N-th order accurate differences at the boundaries. + datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', + 'us', 'ns', 'ps', 'fs', 'as'} + Unit to compute gradient. Only valid for datetime coordinate. + + Returns + ------- + differentiated: Dataset + + See also + -------- + numpy.gradient: corresponding numpy function + """ + from .variable import Variable + + if coord not in self.variables and coord not in self.dims: + raise ValueError('Coordinate {} does not exist.'.format(coord)) + + coord_var = self[coord].variable + if coord_var.ndim != 1: + raise ValueError('Coordinate {} must be 1 dimensional but is {}' + ' dimensional'.format(coord, coord_var.ndim)) + + dim = coord_var.dims[0] + coord_data = coord_var.data + if coord_data.dtype.kind in 'mM': + if datetime_unit is None: + datetime_unit, _ = np.datetime_data(coord_data.dtype) + coord_data = to_numeric(coord_data, datetime_unit=datetime_unit) + + variables = OrderedDict() + for k, v in self.variables.items(): + if (k in self.data_vars and dim in v.dims and + k not in self.coords): + v = to_numeric(v, datetime_unit=datetime_unit) + grad = duck_array_ops.gradient( + v.data, coord_data, edge_order=edge_order, + axis=v.get_axis_num(dim)) + variables[k] = Variable(v.dims, grad) + else: + variables[k] = v + return self._replace_vars_and_dims(variables) + @property def real(self): return self._unary_op(lambda x: x.real, keep_attrs=True)(self) @@ -3676,7 +3735,7 @@ def filter_by_attrs(self, **kwargs): Can pass in ``key=value`` or ``key=callable``. A Dataset is returned containing only the variables for which all the filter tests pass. - These tests are either ``key=value`` for which the attribute ``key`` + These tests are either ``key=value`` for which the attribute ``key`` has the exact value ``value`` or the callable passed into ``key=callable`` returns True. The callable will be passed a single value, either the value of the attribute ``key`` or ``None`` if the diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 17eb310f8db..ef89dba2ab8 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -93,6 +93,14 @@ def isnull(data): einsum = _dask_or_eager_func('einsum', array_args=slice(1, None), requires_dask='0.17.3') + +def gradient(x, coord, axis, edge_order): + if isinstance(x, dask_array_type): + return dask_array_compat.gradient( + x, coord, axis=axis, edge_order=edge_order) + return npcompat.gradient(x, coord, axis=axis, edge_order=edge_order) + + masked_invalid = _dask_or_eager_func( 'masked_invalid', eager_module=np.ma, dask_module=getattr(dask_array, 'ma', None)) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 90aa4ffaeda..afb34d99115 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -11,7 +11,7 @@ from . import rolling from .computation import apply_ufunc from .pycompat import iteritems -from .utils import is_scalar, OrderedSet +from .utils import is_scalar, OrderedSet, to_numeric from .variable import Variable, broadcast_variables from .duck_array_ops import dask_array_type @@ -414,8 +414,8 @@ def _floatize_x(x, new_x): # offset (min(x)) and the variation (x - min(x)) can be # represented by float. xmin = np.min(x[i]) - x[i] = (x[i] - xmin).astype(np.float64) - new_x[i] = (new_x[i] - xmin).astype(np.float64) + x[i] = to_numeric(x[i], offset=xmin, dtype=np.float64) + new_x[i] = to_numeric(new_x[i], offset=xmin, dtype=np.float64) return x, new_x diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index 6d4db063b98..22dff44acf8 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -1,5 +1,6 @@ from __future__ import absolute_import, division, print_function +from distutils.version import LooseVersion import numpy as np try: @@ -97,3 +98,187 @@ def isin(element, test_elements, assume_unique=False, invert=False): element = np.asarray(element) return np.in1d(element, test_elements, assume_unique=assume_unique, invert=invert).reshape(element.shape) + + +if LooseVersion(np.__version__) >= LooseVersion('1.13'): + gradient = np.gradient +else: + def normalize_axis_tuple(axes, N): + if isinstance(axes, int): + axes = (axes, ) + return tuple([N + a if a < 0 else a for a in axes]) + + def gradient(f, *varargs, **kwargs): + f = np.asanyarray(f) + N = f.ndim # number of dimensions + + axes = kwargs.pop('axis', None) + if axes is None: + axes = tuple(range(N)) + else: + axes = normalize_axis_tuple(axes, N) + + len_axes = len(axes) + n = len(varargs) + if n == 0: + # no spacing argument - use 1 in all axes + dx = [1.0] * len_axes + elif n == 1 and np.ndim(varargs[0]) == 0: + # single scalar for all axes + dx = varargs * len_axes + elif n == len_axes: + # scalar or 1d array for each axis + dx = list(varargs) + for i, distances in enumerate(dx): + if np.ndim(distances) == 0: + continue + elif np.ndim(distances) != 1: + raise ValueError("distances must be either scalars or 1d") + if len(distances) != f.shape[axes[i]]: + raise ValueError("when 1d, distances must match the " + "length of the corresponding dimension") + diffx = np.diff(distances) + # if distances are constant reduce to the scalar case + # since it brings a consistent speedup + if (diffx == diffx[0]).all(): + diffx = diffx[0] + dx[i] = diffx + else: + raise TypeError("invalid number of arguments") + + edge_order = kwargs.pop('edge_order', 1) + if kwargs: + raise TypeError('"{}" are not valid keyword arguments.'.format( + '", "'.join(kwargs.keys()))) + if edge_order > 2: + raise ValueError("'edge_order' greater than 2 not supported") + + # use central differences on interior and one-sided differences on the + # endpoints. This preserves second order-accuracy over the full domain. + + outvals = [] + + # create slice objects --- initially all are [:, :, ..., :] + slice1 = [slice(None)] * N + slice2 = [slice(None)] * N + slice3 = [slice(None)] * N + slice4 = [slice(None)] * N + + otype = f.dtype.char + if otype not in ['f', 'd', 'F', 'D', 'm', 'M']: + otype = 'd' + + # Difference of datetime64 elements results in timedelta64 + if otype == 'M': + # Need to use the full dtype name because it contains unit + # information + otype = f.dtype.name.replace('datetime', 'timedelta') + elif otype == 'm': + # Needs to keep the specific units, can't be a general unit + otype = f.dtype + + # Convert datetime64 data into ints. Make dummy variable `y` + # that is a view of ints if the data is datetime64, otherwise + # just set y equal to the array `f`. + if f.dtype.char in ["M", "m"]: + y = f.view('int64') + else: + y = f + + for i, axis in enumerate(axes): + if y.shape[axis] < edge_order + 1: + raise ValueError( + "Shape of array too small to calculate a numerical " + "gradient, at least (edge_order + 1) elements are " + "required.") + # result allocation + out = np.empty_like(y, dtype=otype) + + uniform_spacing = np.ndim(dx[i]) == 0 + + # Numerical differentiation: 2nd order interior + slice1[axis] = slice(1, -1) + slice2[axis] = slice(None, -2) + slice3[axis] = slice(1, -1) + slice4[axis] = slice(2, None) + + if uniform_spacing: + out[slice1] = (f[slice4] - f[slice2]) / (2. * dx[i]) + else: + dx1 = dx[i][0:-1] + dx2 = dx[i][1:] + a = -(dx2) / (dx1 * (dx1 + dx2)) + b = (dx2 - dx1) / (dx1 * dx2) + c = dx1 / (dx2 * (dx1 + dx2)) + # fix the shape for broadcasting + shape = np.ones(N, dtype=int) + shape[axis] = -1 + a.shape = b.shape = c.shape = shape + # 1D equivalent -- + # out[1:-1] = a * f[:-2] + b * f[1:-1] + c * f[2:] + out[slice1] = a * f[slice2] + b * f[slice3] + c * f[slice4] + + # Numerical differentiation: 1st order edges + if edge_order == 1: + slice1[axis] = 0 + slice2[axis] = 1 + slice3[axis] = 0 + dx_0 = dx[i] if uniform_spacing else dx[i][0] + # 1D equivalent -- out[0] = (y[1] - y[0]) / (x[1] - x[0]) + out[slice1] = (y[slice2] - y[slice3]) / dx_0 + + slice1[axis] = -1 + slice2[axis] = -1 + slice3[axis] = -2 + dx_n = dx[i] if uniform_spacing else dx[i][-1] + # 1D equivalent -- out[-1] = (y[-1] - y[-2]) / (x[-1] - x[-2]) + out[slice1] = (y[slice2] - y[slice3]) / dx_n + + # Numerical differentiation: 2nd order edges + else: + slice1[axis] = 0 + slice2[axis] = 0 + slice3[axis] = 1 + slice4[axis] = 2 + if uniform_spacing: + a = -1.5 / dx[i] + b = 2. / dx[i] + c = -0.5 / dx[i] + else: + dx1 = dx[i][0] + dx2 = dx[i][1] + a = -(2. * dx1 + dx2) / (dx1 * (dx1 + dx2)) + b = (dx1 + dx2) / (dx1 * dx2) + c = - dx1 / (dx2 * (dx1 + dx2)) + # 1D equivalent -- out[0] = a * y[0] + b * y[1] + c * y[2] + out[slice1] = a * y[slice2] + b * y[slice3] + c * y[slice4] + + slice1[axis] = -1 + slice2[axis] = -3 + slice3[axis] = -2 + slice4[axis] = -1 + if uniform_spacing: + a = 0.5 / dx[i] + b = -2. / dx[i] + c = 1.5 / dx[i] + else: + dx1 = dx[i][-2] + dx2 = dx[i][-1] + a = (dx2) / (dx1 * (dx1 + dx2)) + b = - (dx2 + dx1) / (dx1 * dx2) + c = (2. * dx2 + dx1) / (dx2 * (dx1 + dx2)) + # 1D equivalent -- out[-1] = a * f[-3] + b * f[-2] + c * f[-1] + out[slice1] = a * y[slice2] + b * y[slice3] + c * y[slice4] + + outvals.append(out) + + # reset the slice object in this dimension to ":" + slice1[axis] = slice(None) + slice2[axis] = slice(None) + slice3[axis] = slice(None) + slice4[axis] = slice(None) + + if len_axes == 1: + return outvals[0] + else: + return outvals diff --git a/xarray/core/utils.py b/xarray/core/utils.py index c3bb747fac5..9d129d5c4f4 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -591,3 +591,24 @@ def __iter__(self): def __len__(self): num_hidden = sum([k in self._hidden_keys for k in self._data]) return len(self._data) - num_hidden + + +def to_numeric(array, offset=None, datetime_unit=None, dtype=float): + """ + Make datetime array float + + offset: Scalar with the same type of array or None + If None, subtract minimum values to reduce round off error + datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', + 'us', 'ns', 'ps', 'fs', 'as'} + dtype: target dtype + """ + if array.dtype.kind not in ['m', 'M']: + return array.astype(dtype) + if offset is None: + offset = np.min(array) + array = array - offset + + if datetime_unit: + return (array / np.timedelta64(1, datetime_unit)).astype(dtype) + return array.astype(dtype) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d22d8470dc6..5ea6b887b5e 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -15,7 +15,7 @@ from xarray import ( DataArray, Dataset, IndexVariable, MergeError, Variable, align, backends, broadcast, open_dataset, set_options) -from xarray.core import indexing, utils +from xarray.core import indexing, npcompat, utils from xarray.core.common import full_like from xarray.core.pycompat import ( OrderedDict, integer_types, iteritems, unicode_type) @@ -4492,3 +4492,78 @@ def test_raise_no_warning_for_nan_in_binary_ops(): with pytest.warns(None) as record: Dataset(data_vars={'x': ('y', [1, 2, np.NaN])}) > 0 assert len(record) == 0 + + +@pytest.mark.parametrize('dask', [True, False]) +@pytest.mark.parametrize('edge_order', [1, 2]) +def test_gradient(dask, edge_order): + rs = np.random.RandomState(42) + coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8] + + da = xr.DataArray(rs.randn(8, 6), dims=['x', 'y'], + coords={'x': coord, + 'z': 3, 'x2d': (('x', 'y'), rs.randn(8, 6))}) + if dask and has_dask: + da = da.chunk({'x': 4}) + + ds = xr.Dataset({'var': da}) + + # along x + actual = da.differentiate('x', edge_order) + expected_x = xr.DataArray( + npcompat.gradient(da, da['x'], axis=0, edge_order=edge_order), + dims=da.dims, coords=da.coords) + assert_equal(expected_x, actual) + assert_equal(ds['var'].differentiate('x', edge_order=edge_order), + ds.differentiate('x', edge_order=edge_order)['var']) + # coordinate should not change + assert_equal(da['x'], actual['x']) + + # along y + actual = da.differentiate('y', edge_order) + expected_y = xr.DataArray( + npcompat.gradient(da, da['y'], axis=1, edge_order=edge_order), + dims=da.dims, coords=da.coords) + assert_equal(expected_y, actual) + assert_equal(actual, ds.differentiate('y', edge_order=edge_order)['var']) + assert_equal(ds['var'].differentiate('y', edge_order=edge_order), + ds.differentiate('y', edge_order=edge_order)['var']) + + with pytest.raises(ValueError): + da.differentiate('x2d') + + +@pytest.mark.parametrize('dask', [True, False]) +def test_gradient_datetime(dask): + rs = np.random.RandomState(42) + coord = np.array( + ['2004-07-13', '2006-01-13', '2010-08-13', '2010-09-13', + '2010-10-11', '2010-12-13', '2011-02-13', '2012-08-13'], + dtype='datetime64') + + da = xr.DataArray(rs.randn(8, 6), dims=['x', 'y'], + coords={'x': coord, + 'z': 3, 'x2d': (('x', 'y'), rs.randn(8, 6))}) + if dask and has_dask: + da = da.chunk({'x': 4}) + + # along x + actual = da.differentiate('x', edge_order=1, datetime_unit='D') + expected_x = xr.DataArray( + npcompat.gradient( + da, utils.to_numeric(da['x'], datetime_unit='D'), + axis=0, edge_order=1), dims=da.dims, coords=da.coords) + assert_equal(expected_x, actual) + + actual2 = da.differentiate('x', edge_order=1, datetime_unit='h') + assert np.allclose(actual, actual2 * 24) + + # for datetime variable + actual = da['x'].differentiate('x', edge_order=1, datetime_unit='D') + assert np.allclose(actual, 1.0) + + # with different date unit + da = xr.DataArray(coord.astype('datetime64[ms]'), dims=['x'], + coords={'x': coord}) + actual = da.differentiate('x', edge_order=1) + assert np.allclose(actual, 1.0) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 3f32fc49fd2..b9712f60290 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -12,8 +12,8 @@ from xarray import DataArray, Dataset, concat from xarray.core import duck_array_ops, dtypes from xarray.core.duck_array_ops import ( - array_notnull_equiv, concatenate, count, first, last, mean, rolling_window, - stack, where) + array_notnull_equiv, concatenate, count, first, gradient, last, mean, + rolling_window, stack, where) from xarray.core.pycompat import dask_array_type from xarray.testing import assert_allclose, assert_equal @@ -417,6 +417,23 @@ def test_dask_rolling(axis, window, center): fill_value=np.nan) +@pytest.mark.skipif(not has_dask, reason='This is for dask.') +@pytest.mark.parametrize('axis', [0, -1, 1]) +@pytest.mark.parametrize('edge_order', [1, 2]) +def test_dask_gradient(axis, edge_order): + import dask.array as da + + array = np.array(np.random.randn(100, 5, 40)) + x = np.exp(np.linspace(0, 1, array.shape[axis])) + + darray = da.from_array(array, chunks=[(6, 30, 30, 20, 14), 5, 8]) + expected = gradient(array, x, axis=axis, edge_order=edge_order) + actual = gradient(darray, x, axis=axis, edge_order=edge_order) + + assert isinstance(actual, da.Array) + assert_array_equal(actual, expected) + + @pytest.mark.parametrize('dim_num', [1, 2]) @pytest.mark.parametrize('dtype', [float, int, np.float32, np.bool_]) @pytest.mark.parametrize('dask', [False, True])