diff --git a/test/test_data_array.py b/test/test_data_array.py index 439ef208579..3ad1c4e71a2 100644 --- a/test/test_data_array.py +++ b/test/test_data_array.py @@ -170,6 +170,18 @@ def test_constructor_from_self_described(self): actual = DataArray(pd.Index(['a', 'b'], name='foo')) self.assertDataArrayIdentical(expected, actual) + def test_constructor_from_coordinate(self): + values = 10 * np.arange(5) + coord = Coordinate('x', values) + expected = DataArray(values, [values], ['x'], name='x') + actual = DataArray(coord) + self.assertDataArrayIdentical(expected, actual) + + def test_constructor_from_0d(self): + expected = Dataset({None: ([], 0)})[None] + actual = DataArray(0) + self.assertDataArrayIdentical(expected, actual) + def test_equals_and_identical(self): da2 = self.dv.copy() self.assertTrue(self.dv.equals(da2)) @@ -324,9 +336,10 @@ def test_is_null(self): self.assertDataArrayIdentical(~expected, original.notnull()) def test_math(self): - x = self.x - v = self.v - a = self.dv + a = DataArray([np.arange(3), -np.arange(3)], + [[0, 1], ['a', 'b', 'c']], ['x', 'y']) + x = a.values + v = a.variable # variable math was already tested extensively, so let's just make sure # that all types are properly converted here self.assertDataArrayEqual(a, +a) @@ -339,12 +352,14 @@ def test_math(self): self.assertDataArrayEqual(a, a + 0 * a) self.assertDataArrayEqual(a, 0 * a + a) # test different indices - ds2 = self.ds.update({'x': ('x', 3 + np.arange(10))}, inplace=False) - b = ds2['foo'] - with self.assertRaisesRegexp(ValueError, 'not aligned'): - a + b - with self.assertRaisesRegexp(ValueError, 'not aligned'): - b + a + b = a.dataset.update({'x': ('x', [1, 2])}, inplace=False)[None] + self.assertDataArrayEqual(a[1:], a + 0 * b) + self.assertDataArrayEqual(a[1:], 0 * b + a) + expected = DataArray([[np.nan, np.nan, np.nan], -np.arange(3)], + [[0, 1], ['a', 'b', 'c']], ['x', 'y']) + a += 0 * b + self.assertDataArrayIdentical(a, expected) + with self.assertRaisesRegexp(TypeError, 'datasets do not support'): a + a.dataset @@ -548,8 +563,6 @@ def test_concat(self): def test_align(self): self.ds['x'] = ('x', np.array(list('abcdefghij'))) - with self.assertRaises(ValueError): - self.dv + self.dv[:5] dv1, dv2 = align(self.dv, self.dv[:5], join='inner') self.assertDataArrayIdentical(dv1, self.dv[:5]) self.assertDataArrayIdentical(dv2, self.dv[:5]) diff --git a/xray/common.py b/xray/common.py index b94aa7601b7..d457535a1db 100644 --- a/xray/common.py +++ b/xray/common.py @@ -176,7 +176,7 @@ def pretty_print(x, numchars): def dataset_repr(ds): summary = ['' % type(ds).__name__] - max_name_length = max(len(k) for k in ds.variables) if ds else 0 + max_name_length = max(len(str(k)) for k in ds.variables) if ds else 0 first_col_width = max(4 + max_name_length, 16) coords_str = pretty_print('Dimensions:', first_col_width) all_dim_strings = ['%s: %s' % (k, v) for k, v in iteritems(ds.dimensions)] @@ -196,7 +196,7 @@ def summarize_var(k, not_found=' ', found=int): else: indicator = not_found dim_strs.append(pretty_print(prepend + indicator, length)) - string = pretty_print(' ' + k, first_col_width) + ' ' + string = pretty_print(' %s' % k, first_col_width) + ' ' string += ' '.join(dim_strs) return string diff --git a/xray/data_array.py b/xray/data_array.py index dcca07bec93..44f51f06e58 100644 --- a/xray/data_array.py +++ b/xray/data_array.py @@ -135,7 +135,7 @@ class DataArray(AbstractArray): Dictionary of Coordinate objects that label values along each dimension. """ def __init__(self, data=None, coordinates=None, dimensions=None, name=None, - attributes=None, encoding=None, dataset=None): + attributes=None, encoding=None, dataset=None, fastpath=False): """ Parameters ---------- @@ -171,27 +171,29 @@ def __init__(self, data=None, coordinates=None, dimensions=None, name=None, new data array is created from an existing array in this dataset. """ if dataset is None: - # try to fill in arguments from data if they weren't supplied - if coordinates is None: - coordinates = getattr(data, 'coordinates', None) - if isinstance(data, pd.Series): - coordinates = [data.index] - elif isinstance(data, pd.DataFrame): - coordinates = [data.index, data.columns] - elif isinstance(data, pd.Panel): - coordinates = [data.items, data.major_axis, data.minor_axis] - if dimensions is None: - dimensions = getattr(data, 'dimensions', None) - if name is None: - name = getattr(data, 'name', None) - if attributes is None: - attributes = getattr(data, 'attrs', None) - if encoding is None: - encoding = getattr(data, 'encoding', None) - - data = variable._as_compatible_data(data) - coordinates, dimensions = _infer_coordinates_and_dimensions( - data.shape, coordinates, dimensions) + if not fastpath: + # try to fill in arguments from data if they were nott supplied + if coordinates is None: + coordinates = getattr(data, 'coordinates', None) + if isinstance(data, pd.Series): + coordinates = [data.index] + elif isinstance(data, pd.DataFrame): + coordinates = [data.index, data.columns] + elif isinstance(data, pd.Panel): + coordinates = [data.items, data.major_axis, data.minor_axis] + if dimensions is None: + dimensions = getattr(data, 'dimensions', None) + if name is None: + name = getattr(data, 'name', None) + if attributes is None: + attributes = getattr(data, 'attrs', None) + if encoding is None: + encoding = getattr(data, 'encoding', None) + + data = variable._as_compatible_data(data) + coordinates, dimensions = _infer_coordinates_and_dimensions( + data.shape, coordinates, dimensions) + variables = OrderedDict((var.name, var) for var in coordinates) variables[name] = variable.Variable( dimensions, data, attributes, encoding) @@ -777,12 +779,12 @@ def identical(self, other): except AttributeError: return False - def _select_coords(self): - return xray.Dataset(self.coordinates) + # def _select_coords(self): + # return xray.Dataset(self.coordinates) def __array_wrap__(self, obj, context=None): new_var = self.variable.__array_wrap__(obj, context) - ds = self._select_coords() + ds = xray.Dataset(self.coordinates) if (self.name,) == self.dimensions: # use a new name for coordinate variables name = None @@ -798,13 +800,13 @@ def func(self, *args, **kwargs): return self.__array_wrap__(f(self.values, *args, **kwargs)) return func - def _check_coords_compat(self, other): - # TODO: possibly automatically select index intersection instead? - if hasattr(other, 'coordinates'): - for k, v in iteritems(self.coordinates): - if (k in other.coordinates - and not v.equals(other.coordinates[k])): - raise ValueError('coordinate %r is not aligned' % k) + # def _check_coords_compat(self, other): + # # TODO: possibly automatically select index intersection instead? + # if hasattr(other, 'coordinates'): + # for k, v in iteritems(self.coordinates): + # if (k in other.coordinates + # and not v.equals(other.coordinates[k])): + # raise ValueError('coordinate %r is not aligned' % k) @staticmethod def _binary_op(f, reflexive=False): @@ -812,28 +814,41 @@ def _binary_op(f, reflexive=False): def func(self, other): # TODO: automatically group by other variable dimensions to allow # for broadcasting dimensions like 'dayofyear' against 'time' - self._check_coords_compat(other) - ds = self._select_coords() - if hasattr(other, 'coordinates'): - ds.merge(other.coordinates, inplace=True) - other_array = getattr(other, 'variable', other) + if hasattr(other, 'name') or (self.name,) == self.dimensions: name = None else: name = self.name - ds[name] = (f(self.variable, other_array) - if not reflexive - else f(other_array, self.variable)) - return ds[name] + + if hasattr(other, 'coordinates'): + self, other = align(self, other, join='inner', copy=False) + + other_variable = getattr(other, 'variable', other) + var = (f(self.variable, other_variable) + if not reflexive + else f(other_variable, self.variable)) + + coords = list(self.coordinates.values()) + if hasattr(other, 'coordinates'): + for k, v in iteritems(other.coordinates): + if k not in self.coordinates: + coords.append(v) + + return type(self)(var._data, coords, var.dimensions, name, + fastpath=True) return func @staticmethod def _inplace_binary_op(f): @functools.wraps(f) def func(self, other): - self._check_coords_compat(other) - other_array = getattr(other, 'variable', other) - self.variable = f(self.variable, other_array) + if hasattr(other, 'coordinates'): + # self, other = align(self, other, join='left', copy=False) + other = other.reindex_like(self, copy=False) + + other_variable = getattr(other, 'variable', other) + self.variable = f(self.variable, other_variable) + if hasattr(other, 'coordinates'): self.dataset.merge(other.coordinates, inplace=True) return self @@ -875,15 +890,6 @@ def align(*objects, **kwargs): aligned : same as *objects Tuple of objects with aligned coordinates. """ - # TODO: automatically align when doing math with dataset arrays? - # TODO: change this to default to join='outer' like pandas? - if 'join' not in kwargs: - warnings.warn('using align without setting explicitly setting the ' - "'join' keyword argument. In future versions of xray, " - "the default will likely change from join='inner' to " - "join='outer', to match pandas.", - FutureWarning, stacklevel=2) - join = kwargs.pop('join', 'inner') copy = kwargs.pop('copy', True)