Skip to content

implement Gradient #2398

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 27 commits into from
Sep 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ Computation
Dataset.resample
Dataset.diff
Dataset.quantile
Dataset.differentiate

**Aggregation**:
:py:attr:`~Dataset.all`
Expand Down Expand Up @@ -317,6 +318,7 @@ Computation
DataArray.diff
DataArray.dot
DataArray.quantile
DataArray.differentiate

**Aggregation**:
:py:attr:`~DataArray.all`
Expand Down
23 changes: 23 additions & 0 deletions doc/computation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,29 @@ You can also use ``construct`` to compute a weighted rolling sum:
To avoid this, use ``skipna=False`` as the above example.


Computation using Coordinates
=============================

Xarray objects have some handy methods for the computation with their
coordinates. :py:meth:`~xarray.DataArray.differentiate` computes derivatives by
central finite differences using their coordinates,

.. ipython:: python
a = xr.DataArray([0, 1, 2, 3], dims=['x'], coords=[0.1, 0.11, 0.2, 0.3])
a
a.differentiate('x')

This method can be used also for multidimensional arrays,

.. ipython:: python
a = xr.DataArray(np.arange(8).reshape(4, 2), dims=['x', 'y'],
coords=[0.1, 0.11, 0.2, 0.3])
a.differentiate('x')

.. note::
This method is limited to simple cartesian geometry. Differentiation along
multidimensional coordinate is not supported.

.. _compute.broadcasting:

Broadcasting by dimension name
Expand Down
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ Documentation
Enhancements
~~~~~~~~~~~~

- :py:meth:`~xarray.DataArray.differentiate` and
:py:meth:`~xarray.Dataset.differentiate` are newly added.
(:issue:`1332`)
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
- Default colormap for sequential and divergent data can now be set via
:py:func:`~xarray.set_options()`
(:issue:`2394`)
Expand Down
2 changes: 1 addition & 1 deletion xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .core.alignment import align, broadcast, broadcast_arrays
from .core.common import full_like, zeros_like, ones_like
from .core.combine import concat, auto_combine
from .core.computation import apply_ufunc, where, dot
from .core.computation import apply_ufunc, dot, where
from .core.extensions import (register_dataarray_accessor,
register_dataset_accessor)
from .core.variable import as_variable, Variable, IndexVariable, Coordinate
Expand Down
124 changes: 124 additions & 0 deletions xarray/core/dask_array_compat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import absolute_import, division, print_function

from distutils.version import LooseVersion

import numpy as np
from dask import __version__ as dask_version
import dask.array as da

try:
Expand Down Expand Up @@ -30,3 +33,124 @@ def isin(element, test_elements, assume_unique=False, invert=False):
if invert:
result = ~result
return result


if LooseVersion(dask_version) > LooseVersion('1.19.2'):
gradient = da.gradient

else: # pragma: no cover
# Copied from dask v0.19.2
# Used under the terms of Dask's license, see licenses/DASK_LICENSE.
import math
from numbers import Integral, Real

AxisError = np.AxisError

def validate_axis(axis, ndim):
""" Validate an input to axis= keywords """
if isinstance(axis, (tuple, list)):
return tuple(validate_axis(ax, ndim) for ax in axis)
if not isinstance(axis, Integral):
raise TypeError("Axis value must be an integer, got %s" % axis)
if axis < -ndim or axis >= ndim:
raise AxisError("Axis %d is out of bounds for array of dimension "
"%d" % (axis, ndim))
if axis < 0:
axis += ndim
return axis

def _gradient_kernel(x, block_id, coord, axis, array_locs, grad_kwargs):
"""
x: nd-array
array of one block
coord: 1d-array or scalar
coordinate along which the gradient is computed.
axis: int
axis along which the gradient is computed
array_locs:
actual location along axis. None if coordinate is scalar
grad_kwargs:
keyword to be passed to np.gradient
"""
block_loc = block_id[axis]
if array_locs is not None:
coord = coord[array_locs[0][block_loc]:array_locs[1][block_loc]]
grad = np.gradient(x, coord, axis=axis, **grad_kwargs)
return grad

def gradient(f, *varargs, **kwargs):
f = da.asarray(f)

kwargs["edge_order"] = math.ceil(kwargs.get("edge_order", 1))
if kwargs["edge_order"] > 2:
raise ValueError("edge_order must be less than or equal to 2.")

drop_result_list = False
axis = kwargs.pop("axis", None)
if axis is None:
axis = tuple(range(f.ndim))
elif isinstance(axis, Integral):
drop_result_list = True
axis = (axis,)

axis = validate_axis(axis, f.ndim)

if len(axis) != len(set(axis)):
raise ValueError("duplicate axes not allowed")

axis = tuple(ax % f.ndim for ax in axis)

if varargs == ():
varargs = (1,)
if len(varargs) == 1:
varargs = len(axis) * varargs
if len(varargs) != len(axis):
raise TypeError(
"Spacing must either be a single scalar, or a scalar / "
"1d-array per axis"
)

if issubclass(f.dtype.type, (np.bool8, Integral)):
f = f.astype(float)
elif issubclass(f.dtype.type, Real) and f.dtype.itemsize < 4:
f = f.astype(float)

results = []
for i, ax in enumerate(axis):
for c in f.chunks[ax]:
if np.min(c) < kwargs["edge_order"] + 1:
raise ValueError(
'Chunk size must be larger than edge_order + 1. '
'Minimum chunk for aixs {} is {}. Rechunk to '
'proceed.'.format(np.min(c), ax))

if np.isscalar(varargs[i]):
array_locs = None
else:
if isinstance(varargs[i], da.Array):
raise NotImplementedError(
'dask array coordinated is not supported.')
# coordinate position for each block taking overlap into
# account
chunk = np.array(f.chunks[ax])
array_loc_stop = np.cumsum(chunk) + 1
array_loc_start = array_loc_stop - chunk - 2
array_loc_stop[-1] -= 1
array_loc_start[0] = 0
array_locs = (array_loc_start, array_loc_stop)

results.append(f.map_overlap(
_gradient_kernel,
dtype=f.dtype,
depth={j: 1 if j == ax else 0 for j in range(f.ndim)},
boundary="none",
coord=varargs[i],
axis=ax,
array_locs=array_locs,
grad_kwargs=kwargs,
))

if drop_result_list:
results = results[0]

return results
58 changes: 58 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2010,6 +2010,9 @@ def diff(self, dim, n=1, label='upper'):
Coordinates:
* x (x) int64 3 4

See Also
--------
DataArray.differentiate
"""
ds = self._to_temp_dataset().diff(n=n, dim=dim, label=label)
return self._from_temp_dataset(ds)
Expand Down Expand Up @@ -2289,6 +2292,61 @@ def rank(self, dim, pct=False, keep_attrs=False):
ds = self._to_temp_dataset().rank(dim, pct=pct, keep_attrs=keep_attrs)
return self._from_temp_dataset(ds)

def differentiate(self, coord, edge_order=1, datetime_unit=None):
""" Differentiate the array with the second order accurate central
differences.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and here


.. note::
This feature is limited to simple cartesian geometry, i.e. coord
must be one dimensional.

Parameters
----------
coord: str
The coordinate to be used to compute the gradient.
edge_order: 1 or 2. Default 1
N-th order accurate differences at the boundaries.
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
'us', 'ns', 'ps', 'fs', 'as'}
Unit to compute gradient. Only valid for datetime coordinate.

Returns
-------
differentiated: DataArray

See also
--------
numpy.gradient: corresponding numpy function

Examples
--------

>>> da = xr.DataArray(np.arange(12).reshape(4, 3), dims=['x', 'y'],
... coords={'x': [0, 0.1, 1.1, 1.2]})
>>> da
<xarray.DataArray (x: 4, y: 3)>
array([[ 0, 1, 2],
[ 3, 4, 5],
[ 6, 7, 8],
[ 9, 10, 11]])
Coordinates:
* x (x) float64 0.0 0.1 1.1 1.2
Dimensions without coordinates: y
>>>
>>> da.differentiate('x')
<xarray.DataArray (x: 4, y: 3)>
array([[30. , 30. , 30. ],
[27.545455, 27.545455, 27.545455],
[27.545455, 27.545455, 27.545455],
[30. , 30. , 30. ]])
Coordinates:
* x (x) float64 0.0 0.1 1.1 1.2
Dimensions without coordinates: y
"""
ds = self._to_temp_dataset().differentiate(
coord, edge_order, datetime_unit)
return self._from_temp_dataset(ds)


# priority most be higher than Variable to properly work with binary ufuncs
ops.inject_all_ops_and_reduce_methods(DataArray, priority=60)
67 changes: 63 additions & 4 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
import xarray as xr

from . import (
alignment, duck_array_ops, formatting, groupby, indexing, ops, resample,
rolling, utils)
alignment, computation, duck_array_ops, formatting, groupby, indexing, ops,
resample, rolling, utils)
from .. import conventions
from .alignment import align
from .common import (
Expand All @@ -31,7 +31,7 @@
OrderedDict, basestring, dask_array_type, integer_types, iteritems, range)
from .utils import (
Frozen, SortedKeysDict, either_dict_or_kwargs, decode_numpy_dict_values,
ensure_us_time_resolution, hashable, maybe_wrap_array)
ensure_us_time_resolution, hashable, maybe_wrap_array, to_numeric)
from .variable import IndexVariable, Variable, as_variable, broadcast_variables

# list of attributes of pd.DatetimeIndex that are ndarrays of time info
Expand Down Expand Up @@ -3313,6 +3313,9 @@ def diff(self, dim, n=1, label='upper'):
Data variables:
foo (x) int64 1 -1

See Also
--------
Dataset.differentiate
"""
if n == 0:
return self
Expand Down Expand Up @@ -3663,6 +3666,62 @@ def rank(self, dim, pct=False, keep_attrs=False):
attrs = self.attrs if keep_attrs else None
return self._replace_vars_and_dims(variables, coord_names, attrs=attrs)

def differentiate(self, coord, edge_order=1, datetime_unit=None):
""" Differentiate with the second order accurate central
differences.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and here


.. note::
This feature is limited to simple cartesian geometry, i.e. coord
must be one dimensional.

Parameters
----------
coord: str
The coordinate to be used to compute the gradient.
edge_order: 1 or 2. Default 1
N-th order accurate differences at the boundaries.
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
'us', 'ns', 'ps', 'fs', 'as'}
Unit to compute gradient. Only valid for datetime coordinate.

Returns
-------
differentiated: Dataset

See also
--------
numpy.gradient: corresponding numpy function
"""
from .variable import Variable

if coord not in self.variables and coord not in self.dims:
raise ValueError('Coordinate {} does not exist.'.format(coord))

coord_var = self[coord].variable
if coord_var.ndim != 1:
raise ValueError('Coordinate {} must be 1 dimensional but is {}'
' dimensional'.format(coord, coord_var.ndim))

dim = coord_var.dims[0]
coord_data = coord_var.data
if coord_data.dtype.kind in 'mM':
if datetime_unit is None:
datetime_unit, _ = np.datetime_data(coord_data.dtype)
coord_data = to_numeric(coord_data, datetime_unit=datetime_unit)

variables = OrderedDict()
for k, v in self.variables.items():
if (k in self.data_vars and dim in v.dims and
k not in self.coords):
v = to_numeric(v, datetime_unit=datetime_unit)
grad = duck_array_ops.gradient(
v.data, coord_data, edge_order=edge_order,
axis=v.get_axis_num(dim))
variables[k] = Variable(v.dims, grad)
else:
variables[k] = v
return self._replace_vars_and_dims(variables)

@property
def real(self):
return self._unary_op(lambda x: x.real, keep_attrs=True)(self)
Expand All @@ -3676,7 +3735,7 @@ def filter_by_attrs(self, **kwargs):

Can pass in ``key=value`` or ``key=callable``. A Dataset is returned
containing only the variables for which all the filter tests pass.
These tests are either ``key=value`` for which the attribute ``key``
These tests are either ``key=value`` for which the attribute ``key``
has the exact value ``value`` or the callable passed into
``key=callable`` returns True. The callable will be passed a single
value, either the value of the attribute ``key`` or ``None`` if the
Expand Down
8 changes: 8 additions & 0 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,14 @@ def isnull(data):
einsum = _dask_or_eager_func('einsum', array_args=slice(1, None),
requires_dask='0.17.3')


def gradient(x, coord, axis, edge_order):
if isinstance(x, dask_array_type):
return dask_array_compat.gradient(
x, coord, axis=axis, edge_order=edge_order)
return npcompat.gradient(x, coord, axis=axis, edge_order=edge_order)


masked_invalid = _dask_or_eager_func(
'masked_invalid', eager_module=np.ma,
dask_module=getattr(dask_array, 'ma', None))
Expand Down
Loading