Skip to content

Add keep_attrs to reduction methods #141

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 22, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions test/test_data_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd
from copy import deepcopy
from textwrap import dedent
from collections import OrderedDict

from xray import Dataset, DataArray, Variable, align
from xray.pycompat import iteritems
Expand All @@ -10,8 +11,10 @@

class TestDataArray(TestCase):
def setUp(self):
self._attrs = {'attr1': 'value1', 'attr2': 2929}
self.x = np.random.random((10, 20))
self.v = Variable(['x', 'y'], self.x)
self.va = Variable(['x', 'y'], self.x, self._attrs)
self.ds = Dataset({'foo': self.v})
self.dv = self.ds['foo']

Expand Down Expand Up @@ -262,6 +265,17 @@ def test_reduce(self):
# needs more...
# should check which extra dimensions are dropped

def test_reduce_keep_attrs(self):
# Test dropped attrs
vm = self.va.mean()
self.assertEqual(len(vm.attrs), 0)
self.assertEqual(vm.attrs, OrderedDict())

# Test kept attrs
vm = self.va.mean(keep_attrs=True)
self.assertEqual(len(vm.attrs), len(self._attrs))
self.assertEqual(vm.attrs, self._attrs)

def test_unselect(self):
with self.assertRaisesRegexp(ValueError, 'cannot unselect the name'):
self.dv.unselect('foo')
Expand Down
17 changes: 17 additions & 0 deletions test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,3 +698,20 @@ def test_reduce_non_numeric(self):
self.assertDatasetEqual(data1.mean(), data2.mean())
self.assertDatasetEqual(data1.mean(dimension='dim1'),
data2.mean(dimension='dim1'))

def test_reduce_keep_attrs(self):
data = create_test_data()
_attrs = {'attr1': 'value1', 'attr2': 2929}

attrs = OrderedDict(_attrs)
data.attrs = attrs

# Test dropped attrs
ds = data.mean()
self.assertEqual(len(ds.attrs), 0)
self.assertEqual(ds.attrs, OrderedDict())

# Test kept attrs
ds = data.mean(keep_attrs=True)
self.assertEqual(len(ds.attrs), len(_attrs))
self.assertTrue(ds.attrs, attrs)
15 changes: 15 additions & 0 deletions test/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,21 @@ def test_reduce(self):
with self.assertRaisesRegexp(ValueError, 'cannot supply both'):
v.mean(dimension='x', axis=0)

def test_reduce_keep_attrs(self):
_attrs = {'units': 'test', 'long_name': 'testing'}

v = Variable(['x', 'y'], self.d, _attrs)

# Test dropped attrs
vm = v.mean()
self.assertEqual(len(vm.attrs), 0)
self.assertEqual(vm.attrs, OrderedDict())

# Test kept attrs
vm = v.mean(keep_attrs=True)
self.assertEqual(len(vm.attrs), len(_attrs))
self.assertEqual(vm.attrs, _attrs)


class TestCoordinate(TestCase, VariableSubclassTestCases):
cls = staticmethod(Coordinate)
Expand Down
8 changes: 6 additions & 2 deletions xray/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
class ImplementsReduce(object):
@classmethod
def _reduce_method(cls, f, name=None, module=None):
def func(self, dimension=None, axis=None, **kwargs):
return self.reduce(f, dimension, axis, **kwargs)
def func(self, dimension=None, axis=None, keep_attrs=False, **kwargs):
return self.reduce(f, dimension, axis, keep_attrs, **kwargs)
if name is None:
name = f.__name__
func.__name__ = name
Expand Down Expand Up @@ -96,6 +96,10 @@ def _get_axis_num(self, dim):
and 'axis' arguments can be supplied. If neither are supplied, then
`{name}` is calculated over the flattened array (by calling
`{name}(x)` without an axis argument).
keep_attrs : bool, optional
If True, the variable's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
**kwargs : dict
Additional keyword arguments passed on to `{name}`.

Expand Down
13 changes: 11 additions & 2 deletions xray/data_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,8 @@ def squeeze(self, dimension=None):
ds = self.dataset.squeeze(dimension)
return ds[self.name]

def reduce(self, func, dimension=None, axis=None, **kwargs):
def reduce(self, func, dimension=None, axis=None, keep_attrs=False,
**kwargs):
"""Reduce this array by applying `func` along some dimension(s).

Parameters
Expand All @@ -481,6 +482,10 @@ def reduce(self, func, dimension=None, axis=None, **kwargs):
'dimension' and 'axis' arguments can be supplied. If neither are
supplied, then the reduction is calculated over the flattened array
(by calling `f(x)` without an axis argument).
keep_attrs : bool, optional
If True, the variable's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
**kwargs : dict
Additional keyword arguments passed on to `func`.

Expand All @@ -490,7 +495,7 @@ def reduce(self, func, dimension=None, axis=None, **kwargs):
DataArray with this object's array replaced with an array with
summarized data and the indicated dimension(s) removed.
"""
var = self.variable.reduce(func, dimension, axis, **kwargs)
var = self.variable.reduce(func, dimension, axis, keep_attrs, **kwargs)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DataArrays use a dataset internally to keep track of their state. I suppose you probably want to keep attributes for the dataset, too? If so, you should add something like this to the end of this function:

if keep_attrs:
    ds.attrs = self.dataset.attrs

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Easy enough although I'm not sure if these dataset.attrs are ever populated.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They would be populated if, for example, your data array is from a dataset which has attributes.

drop = set(self.dimensions) - set(var.dimensions)
# For now, take an aggressive strategy of removing all variables
# associated with any dropped dimensions
Expand All @@ -499,6 +504,10 @@ def reduce(self, func, dimension=None, axis=None, **kwargs):
if any(dim in drop for dim in v.dimensions)}
ds = self.dataset.unselect(*drop)
ds[self.name] = var

if keep_attrs:
ds.attrs = self.dataset.attrs

return ds[self.name]

@classmethod
Expand Down
19 changes: 15 additions & 4 deletions xray/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,6 +983,10 @@ def squeeze(self, dimension=None):
dimension : str or sequence of str, optional
Dimension(s) over which to apply `func`. By default `func` is
applied over all dimensions.
keep_attrs : bool, optional
If True, the datasets's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
**kwargs : dict
Additional keyword arguments passed on to `{name}`.

Expand All @@ -995,8 +999,8 @@ def squeeze(self, dimension=None):

@classmethod
def _reduce_method(cls, f, name=None, module=None):
def func(self, dimension=None, **kwargs):
return self.reduce(f, dimension, **kwargs)
def func(self, dimension=None, keep_attrs=False, **kwargs):
return self.reduce(f, dimension, keep_attrs, **kwargs)
if name is None:
name = f.__name__
func.__name__ = name
Expand All @@ -1005,7 +1009,7 @@ def func(self, dimension=None, **kwargs):
cls=cls.__name__)
return func

def reduce(self, func, dimension=None, **kwargs):
def reduce(self, func, dimension=None, keep_attrs=False, **kwargs):
"""Reduce this dataset by applying `func` along some dimension(s).

Parameters
Expand All @@ -1017,6 +1021,10 @@ def reduce(self, func, dimension=None, **kwargs):
dimension : str or sequence of str, optional
Dimension(s) over which to apply `func`. By default `func` is
applied over all dimensions.
keep_attrs : bool, optional
If True, the datasets's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
**kwargs : dict
Additional keyword arguments passed on to `func`.

Expand Down Expand Up @@ -1052,7 +1060,10 @@ def reduce(self, func, dimension=None, **kwargs):
pass
else:
variables[name] = var
return Dataset(variables=variables)

attrs = self.attrs if keep_attrs else {}

return Dataset(variables=variables, attributes=attrs)

@classmethod
def concat(cls, datasets, dimension='concat_dimension', indexers=None,
Expand Down
12 changes: 10 additions & 2 deletions xray/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,8 @@ def squeeze(self, dimension=None):
dimensions = dict(zip(self.dimensions, self.shape))
return utils.squeeze(self, dimensions, dimension)

def reduce(self, func, dimension=None, axis=None, **kwargs):
def reduce(self, func, dimension=None, axis=None, keep_attrs=False,
**kwargs):
"""Reduce this array by applying `func` along some dimension(s).

Parameters
Expand All @@ -473,6 +474,10 @@ def reduce(self, func, dimension=None, axis=None, **kwargs):
and 'axis' arguments can be supplied. If neither are supplied, then
the reduction is calculated over the flattened array (by calling
`func(x)` without an axis argument).
keep_attrs : bool, optional
If True, the variable's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
**kwargs : dict
Additional keyword arguments passed on to `func`.

Expand All @@ -482,6 +487,7 @@ def reduce(self, func, dimension=None, axis=None, **kwargs):
Array with summarized data and the indicated dimension(s)
removed.
"""

if dimension is not None and axis is not None:
raise ValueError("cannot supply both 'axis' and 'dimension' "
"arguments")
Expand All @@ -495,7 +501,9 @@ def reduce(self, func, dimension=None, axis=None, **kwargs):
dims = [dim for n, dim in enumerate(self.dimensions)
if n not in removed_axes]

return Variable(dims, data)
attrs = self.attrs if keep_attrs else {}

return Variable(dims, data, attributes=attrs)

@classmethod
def concat(cls, variables, dimension='stacked_dimension',
Expand Down