Skip to content

Commit f554d0a

Browse files
Add DataArrayCoarsen.reduce and DatasetCoarsen.reduce methods (#4939)
1 parent 200c2b2 commit f554d0a

File tree

5 files changed

+103
-2
lines changed

5 files changed

+103
-2
lines changed

doc/api-hidden.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
core.rolling.DatasetCoarsen.median
4848
core.rolling.DatasetCoarsen.min
4949
core.rolling.DatasetCoarsen.prod
50+
core.rolling.DatasetCoarsen.reduce
5051
core.rolling.DatasetCoarsen.std
5152
core.rolling.DatasetCoarsen.sum
5253
core.rolling.DatasetCoarsen.var
@@ -190,6 +191,7 @@
190191
core.rolling.DataArrayCoarsen.median
191192
core.rolling.DataArrayCoarsen.min
192193
core.rolling.DataArrayCoarsen.prod
194+
core.rolling.DataArrayCoarsen.reduce
193195
core.rolling.DataArrayCoarsen.std
194196
core.rolling.DataArrayCoarsen.sum
195197
core.rolling.DataArrayCoarsen.var

doc/whats-new.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ New Features
100100
(including globs for the latter) for ``engine="zarr"``, and so allow reading from
101101
many remote and other file systems (:pull:`4461`)
102102
By `Martin Durant <https://github.com/martindurant>`_
103+
- :py:class:`DataArrayCoarsen` and :py:class:`DatasetCoarsen` now implement a
104+
``reduce`` method, enabling coarsening operations with custom reduction
105+
functions (:issue:`3741`, :pull:`4939`). By `Spencer Clark
106+
<https://github.com/spencerkclark>`_.
103107

104108
Bug fixes
105109
~~~~~~~~~

xarray/core/rolling.py

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -836,7 +836,9 @@ class DataArrayCoarsen(Coarsen):
836836
_reduce_extra_args_docstring = """"""
837837

838838
@classmethod
839-
def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
839+
def _reduce_method(
840+
cls, func: Callable, include_skipna: bool = False, numeric_only: bool = False
841+
):
840842
"""
841843
Return a wrapped function for injecting reduction methods.
842844
see ops.inject_reduce_methods
@@ -871,14 +873,48 @@ def wrapped_func(self, **kwargs):
871873

872874
return wrapped_func
873875

876+
def reduce(self, func: Callable, **kwargs):
877+
"""Reduce the items in this group by applying `func` along some
878+
dimension(s).
879+
880+
Parameters
881+
----------
882+
func : callable
883+
Function which can be called in the form `func(x, axis, **kwargs)`
884+
to return the result of collapsing an np.ndarray over the coarsening
885+
dimensions. It must be possible to provide the `axis` argument
886+
with a tuple of integers.
887+
**kwargs : dict
888+
Additional keyword arguments passed on to `func`.
889+
890+
Returns
891+
-------
892+
reduced : DataArray
893+
Array with summarized data.
894+
895+
Examples
896+
--------
897+
>>> da = xr.DataArray(np.arange(8).reshape(2, 4), dims=("a", "b"))
898+
>>> coarsen = da.coarsen(b=2)
899+
>>> coarsen.reduce(np.sum)
900+
<xarray.DataArray (a: 2, b: 2)>
901+
array([[ 1, 5],
902+
[ 9, 13]])
903+
Dimensions without coordinates: a, b
904+
"""
905+
wrapped_func = self._reduce_method(func)
906+
return wrapped_func(self, **kwargs)
907+
874908

875909
class DatasetCoarsen(Coarsen):
876910
__slots__ = ()
877911

878912
_reduce_extra_args_docstring = """"""
879913

880914
@classmethod
881-
def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
915+
def _reduce_method(
916+
cls, func: Callable, include_skipna: bool = False, numeric_only: bool = False
917+
):
882918
"""
883919
Return a wrapped function for injecting reduction methods.
884920
see ops.inject_reduce_methods
@@ -917,6 +953,28 @@ def wrapped_func(self, **kwargs):
917953

918954
return wrapped_func
919955

956+
def reduce(self, func: Callable, **kwargs):
957+
"""Reduce the items in this group by applying `func` along some
958+
dimension(s).
959+
960+
Parameters
961+
----------
962+
func : callable
963+
Function which can be called in the form `func(x, axis, **kwargs)`
964+
to return the result of collapsing an np.ndarray over the coarsening
965+
dimensions. It must be possible to provide the `axis` argument with
966+
a tuple of integers.
967+
**kwargs : dict
968+
Additional keyword arguments passed on to `func`.
969+
970+
Returns
971+
-------
972+
reduced : Dataset
973+
Arrays with summarized data.
974+
"""
975+
wrapped_func = self._reduce_method(func)
976+
return wrapped_func(self, **kwargs)
977+
920978

921979
inject_reduce_methods(DataArrayCoarsen)
922980
inject_reduce_methods(DatasetCoarsen)

xarray/tests/test_dataarray.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6382,6 +6382,22 @@ def test_coarsen_keep_attrs():
63826382
xr.testing.assert_identical(da, da2)
63836383

63846384

6385+
@pytest.mark.parametrize("da", (1, 2), indirect=True)
6386+
@pytest.mark.parametrize("window", (1, 2, 3, 4))
6387+
@pytest.mark.parametrize("name", ("sum", "mean", "std", "max"))
6388+
def test_coarsen_reduce(da, window, name):
6389+
if da.isnull().sum() > 1 and window == 1:
6390+
pytest.skip("These parameters lead to all-NaN slices")
6391+
6392+
# Use boundary="trim" to accomodate all window sizes used in tests
6393+
coarsen_obj = da.coarsen(time=window, boundary="trim")
6394+
6395+
# add nan prefix to numpy methods to get similar # behavior as bottleneck
6396+
actual = coarsen_obj.reduce(getattr(np, f"nan{name}"))
6397+
expected = getattr(coarsen_obj, name)()
6398+
assert_allclose(actual, expected)
6399+
6400+
63856401
@pytest.mark.parametrize("da", (1, 2), indirect=True)
63866402
def test_rolling_iter(da):
63876403
rolling_obj = da.rolling(time=7)

xarray/tests/test_dataset.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6055,6 +6055,27 @@ def test_coarsen_keep_attrs():
60556055
xr.testing.assert_identical(ds, ds2)
60566056

60576057

6058+
@pytest.mark.slow
6059+
@pytest.mark.parametrize("ds", (1, 2), indirect=True)
6060+
@pytest.mark.parametrize("window", (1, 2, 3, 4))
6061+
@pytest.mark.parametrize("name", ("sum", "mean", "std", "var", "min", "max", "median"))
6062+
def test_coarsen_reduce(ds, window, name):
6063+
# Use boundary="trim" to accomodate all window sizes used in tests
6064+
coarsen_obj = ds.coarsen(time=window, boundary="trim")
6065+
6066+
# add nan prefix to numpy methods to get similar behavior as bottleneck
6067+
actual = coarsen_obj.reduce(getattr(np, f"nan{name}"))
6068+
expected = getattr(coarsen_obj, name)()
6069+
assert_allclose(actual, expected)
6070+
6071+
# make sure the order of data_var are not changed.
6072+
assert list(ds.data_vars.keys()) == list(actual.data_vars.keys())
6073+
6074+
# Make sure the dimension order is restored
6075+
for key, src_var in ds.data_vars.items():
6076+
assert src_var.dims == actual[key].dims
6077+
6078+
60586079
@pytest.mark.parametrize(
60596080
"funcname, argument",
60606081
[

0 commit comments

Comments
 (0)