diff --git a/doc/_static/.gitignore b/doc/_static/.gitignore new file mode 100644 index 00000000000..50601326003 --- /dev/null +++ b/doc/_static/.gitignore @@ -0,0 +1 @@ +examples*.png \ No newline at end of file diff --git a/doc/api.rst b/doc/api.rst index decf40081e0..654fec41e9c 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -20,9 +20,9 @@ Attributes and underlying data .. autosummary:: :toctree: generated/ - Dataset.coordinates + Dataset.coords Dataset.noncoordinates - Dataset.dimensions + Dataset.dims Dataset.attrs Dataset contents @@ -98,7 +98,6 @@ IO / Conversion Dataset.to_netcdf Dataset.dumps - Dataset.dump_to_store Dataset.close Dataset.to_dataframe Dataset.from_dataframe @@ -147,7 +146,8 @@ Attributes and underlying data DataArray.values DataArray.as_index - DataArray.coordinates + DataArray.coords + DataArray.dims DataArray.name DataArray.dataset DataArray.attrs diff --git a/doc/examples.rst b/doc/examples.rst index 8f1caf4bd8b..44e989ed173 100644 --- a/doc/examples.rst +++ b/doc/examples.rst @@ -60,7 +60,7 @@ Monthly averaging with values given by the first date of the month in which each time falls. """ - time = xray_obj.coordinates['time'] + time = xray_obj.coords['time'] values = pd.Index(time).to_period('M').to_timestamp() return xray.DataArray(values, [time], name='year_month') @@ -74,7 +74,7 @@ Monthly averaging :suppress: def year_month(xray_obj): - time = xray_obj.coordinates['time'] + time = xray_obj.coords['time'] values = time.as_index.to_period('M').to_timestamp() return xray.DataArray(values, [time], name='year_month') @@ -83,7 +83,7 @@ Monthly averaging monthly_avg = ds.groupby(year_month(ds)).mean('time') - @savefig examples_tmin_tmax_plot2.png width=4in + @savefig examples_tmin_tmax_plot_mean.png width=4in monthly_avg.mean('x').to_dataframe().plot(style='s-') diff --git a/doc/faq.rst b/doc/faq.rst index 6eccee97699..4cbadaacafc 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -69,9 +69,9 @@ and coordinates, xray supports arbitrary metadata in the form of global Automatic interpretation of labels is powerful but also reduces flexibility. With xray, we draw a firm line between labels that the library understands -(``dimensions`` and ``coordinates``) and labels for users and user code -(``attrs``). For example, we do not automatically intrepret and enforce units -or `CF conventions`_. (An exception is serialization to netCDF with +(``dims`` and ``coords``) and labels for users and user code (``attrs``). For +example, we do not automatically intrepret and enforce units or `CF +conventions`_. (An exception is serialization to netCDF with ``cf_conventions=True``.) .. _CF conventions: http://cf-pcmdi.llnl.gov/documents/cf-conventions/1.6/cf-conventions.html diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 934b5d49dac..eb8821c8b81 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -22,11 +22,11 @@ To get started, we will import numpy, pandas and xray: multi-dimensional array. It has three key properties: - ``values``: a numpy.ndarray holding the array's values -- ``dimensions``: names for each axis, e.g., ``('x', 'y', 'z')`` -- ``coordinates``: tick labels along each dimension, e.g., 1-dimensional arrays - of numbers, datetime objects or strings. +- ``dims``: dimension names for each axis, e.g., ``('x', 'y', 'z')`` +- ``coords``: tick labels along each dimension, e.g., 1-dimensional + arrays of numbers, datetime objects or strings. -xray uses ``dimensions`` and ``coordinates`` to enable its core metadata aware +xray uses ``dims`` and ``coords`` to enable its core metadata aware operations. Dimensions provide names that xray uses instead of the ``axis`` argument found in many numpy functions. Coordinates enable fast label based indexing and alignment, like the ``index`` found on a pandas @@ -48,8 +48,7 @@ a numpy ndarray), a list of coordinates labels and a list of dimension names: data = np.random.rand(4, 3) locs = ['IA', 'IL', 'IN'] times = pd.date_range('2000-01-01', periods=4) - foo = xray.DataArray(data, coordinates=[times, locs], - dimensions=['time', 'space']) + foo = xray.DataArray(data, coords=[times, locs], dims=['time', 'space']) foo All of these arguments (except for ``data``) are optional, and will be filled @@ -86,8 +85,8 @@ Let's take a look at the important properties on our array: .. ipython:: python foo.values - foo.dimensions - foo.coordinates + foo.dims + foo.coords foo.attrs print(foo.name) @@ -99,13 +98,13 @@ Now fill in some of that missing metadata: foo.attrs['units'] = 'meters' foo -The ``coordinates`` property is ``dict`` like. Individual coordinates can be +The ``coords`` property is ``dict`` like. Individual coordinates can be accessed by name or axis number: .. ipython:: python - foo.coordinates['time'] - foo.coordinates[0] + foo.coords['time'] + foo.coords[0] These are :py:class:`xray.Coordinate` objects, which contain tick-labels for each dimension. @@ -197,7 +196,7 @@ was filled with an array of ascending integers of the proper length: Noncoordinate and coordinates are listed explicitly by the :py:attr:`~xray.Dataset.noncoordinates` and -:py:attr:`~xray.Dataset.coordinates` attributes. +:py:attr:`~xray.Dataset.coords` attributes. There are also a few derived variables based on datetime coordinates that you can access from a dataset (e.g., "year", "month" and "day"), even if you didn't @@ -399,7 +398,7 @@ operation over any or all non-coordinates in a dataset by using Aggregation ~~~~~~~~~~~ -Aggregation methods from ndarray have been updated to take a `dimension` +Aggregation methods from ndarray have been updated to take a `dim` argument instead of `axis`. This allows for very intuitive syntax for aggregation methods that are applied along particular dimension(s): @@ -453,7 +452,7 @@ arrays with different sizes aligned along different dimensions: a = xray.DataArray([1, 2, 3, 4], [['a', 'b', 'c', 'd']], ['x']) a - b = xray.DataArray([-1, -2, -3], dimensions=['y']) + b = xray.DataArray([-1, -2, -3], dims=['y']) b With xray, we can apply binary mathematical operations to these arrays, and @@ -1112,7 +1111,7 @@ and DataArray variables. It supports the numpy ndarray interface, but is extended to support and use basic metadata (not including index values). It consists of: -1. ``dimensions``: A tuple of dimension names. +1. ``dims``: A tuple of dimension names. 2. ``values``: The N-dimensional array (for example, of type :py:class:`numpy.ndarray`) storing the array's data. It must have the same number of dimensions as the length of ``dimensions``. diff --git a/test/__init__.py b/test/__init__.py index 6b4c24dd9ba..f0e81887180 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -72,7 +72,7 @@ def assertVariableIdentical(self, v1, v2): assert as_variable(v1).identical(v2), (v1, v2) def assertVariableAllClose(self, v1, v2, rtol=1e-05, atol=1e-08): - self.assertEqual(v1.dimensions, v2.dimensions) + self.assertEqual(v1.dims, v2.dims) allclose = data_allclose_or_equiv( v1.values, v2.values, rtol=rtol, atol=atol) assert allclose, (v1.values, v2.values) @@ -113,10 +113,10 @@ def assertDatasetAllClose(self, d1, d2, rtol=1e-05, atol=1e-08): self.assertVariableAllClose(v1, v2, rtol=rtol, atol=atol) def assertCoordinatesEqual(self, d1, d2): - self.assertEqual(sorted(d1.coordinates), sorted(d2.coordinates)) - for k in d1.coordinates: - v1 = d1.coordinates[k] - v2 = d2.coordinates[k] + self.assertEqual(sorted(d1.coords), sorted(d2.coords)) + for k in d1.coords: + v1 = d1.coords[k] + v2 = d2.coords[k] self.assertVariableEqual(v1, v2) def assertDataArrayEqual(self, ar1, ar2): diff --git a/test/test_data_array.py b/test/test_data_array.py index adb5bf4f477..c494cae05a1 100644 --- a/test/test_data_array.py +++ b/test/test_data_array.py @@ -39,13 +39,13 @@ def test_properties(self): self.assertDatasetIdentical(self.dv.dataset, self.ds) self.assertVariableEqual(self.dv.variable, self.v) self.assertArrayEqual(self.dv.values, self.v.values) - for attr in ['dimensions', 'dtype', 'shape', 'size', 'ndim', 'attrs']: + for attr in ['dims', 'dtype', 'shape', 'size', 'ndim', 'attrs']: self.assertEqual(getattr(self.dv, attr), getattr(self.v, attr)) self.assertEqual(len(self.dv), len(self.v)) self.assertVariableEqual(self.dv, self.v) - self.assertEqual(list(self.dv.coordinates), list(self.ds.coordinates)) - for k, v in iteritems(self.dv.coordinates): - self.assertArrayEqual(v, self.ds.coordinates[k]) + self.assertEqual(list(self.dv.coords), list(self.ds.coords)) + for k, v in iteritems(self.dv.coords): + self.assertArrayEqual(v, self.ds.coords[k]) with self.assertRaises(AttributeError): self.dv.dataset = self.ds self.assertIsInstance(self.ds['x'].as_index, pd.Index) @@ -68,16 +68,16 @@ def test_name(self): expected = DataArray(Coordinate('y', [3])) self.assertDataArrayIdentical(actual, expected) - def test_dimensions(self): + def test_dims(self): arr = self.dv - self.assertEqual(arr.dimensions, ('x', 'y')) + self.assertEqual(arr.dims, ('x', 'y')) - arr.dimensions = ('w', 'z') - self.assertEqual(arr.dimensions, ('w', 'z')) + arr.dims = ('w', 'z') + self.assertEqual(arr.dims, ('w', 'z')) x = Dataset({'x': ('x', np.arange(5))})['x'] - x.dimensions = ('y',) - self.assertEqual(x.dimensions, ('y',)) + x.dims = ('y',) + self.assertEqual(x.dims, ('y',)) self.assertEqual(x.name, 'y') def test_encoding(self): @@ -110,25 +110,25 @@ def test_constructor(self): 'y': ('y', [-1, -2, -3])})[None] self.assertDataArrayIdentical(expected, actual) - coordinates = [['a', 'b'], [-1, -2, -3]] - actual = DataArray(data, coordinates, ['x', 'y']) + coords = [['a', 'b'], [-1, -2, -3]] + actual = DataArray(data, coords, ['x', 'y']) self.assertDataArrayIdentical(expected, actual) - coordinates = [pd.Index(['a', 'b'], name='A'), + coords = [pd.Index(['a', 'b'], name='A'), pd.Index([-1, -2, -3], name='B')] - actual = DataArray(data, coordinates, ['x', 'y']) + actual = DataArray(data, coords, ['x', 'y']) self.assertDataArrayIdentical(expected, actual) - coordinates = {'x': ['a', 'b'], 'y': [-1, -2, -3]} - actual = DataArray(data, coordinates, ['x', 'y']) + coords = {'x': ['a', 'b'], 'y': [-1, -2, -3]} + actual = DataArray(data, coords, ['x', 'y']) self.assertDataArrayIdentical(expected, actual) - coordinates = OrderedDict([('x', ['a', 'b']), ('y', [-1, -2, -3])]) - actual = DataArray(data, coordinates) + coords = OrderedDict([('x', ['a', 'b']), ('y', [-1, -2, -3])]) + actual = DataArray(data, coords) self.assertDataArrayIdentical(expected, actual) - coordinates = pd.Series([['a', 'b'], [-1, -2, -3]], ['x', 'y']) - actual = DataArray(data, coordinates) + coords = pd.Series([['a', 'b'], [-1, -2, -3]], ['x', 'y']) + actual = DataArray(data, coords) self.assertDataArrayIdentical(expected, actual) expected = Dataset({None: (['x', 'y'], data), @@ -145,35 +145,35 @@ def test_constructor(self): with self.assertRaisesRegexp(ValueError, 'must have the same length'): DataArray(data, {'x': [0, 1, 2]}) - actual = DataArray(data, dimensions=['x', 'y']) + actual = DataArray(data, dims=['x', 'y']) expected = Dataset({None: (['x', 'y'], data)})[None] self.assertDataArrayIdentical(expected, actual) - actual = DataArray(data, dimensions=['x', 'y'], name='foo') + actual = DataArray(data, dims=['x', 'y'], name='foo') expected = Dataset({'foo': (['x', 'y'], data)})['foo'] self.assertDataArrayIdentical(expected, actual) with self.assertRaisesRegexp(TypeError, 'is not a string'): - DataArray(data, dimensions=['x', None]) + DataArray(data, dims=['x', None]) actual = DataArray(data, name='foo') expected = Dataset({'foo': (['dim_0', 'dim_1'], data)})['foo'] self.assertDataArrayIdentical(expected, actual) - actual = DataArray(data, dimensions=['x', 'y'], attributes={'bar': 2}) + actual = DataArray(data, dims=['x', 'y'], attrs={'bar': 2}) expected = Dataset({None: (['x', 'y'], data, {'bar': 2})})[None] self.assertDataArrayIdentical(expected, actual) - actual = DataArray(data, dimensions=['x', 'y'], encoding={'bar': 2}) + actual = DataArray(data, dims=['x', 'y'], encoding={'bar': 2}) expected = Dataset({None: (['x', 'y'], data, {}, {'bar': 2})})[None] self.assertDataArrayIdentical(expected, actual) def test_constructor_from_self_described(self): data = [[-0.1, 21], [0, 2]] expected = DataArray(data, - coordinates={'x': ['a', 'b'], 'y': [-1, -2]}, - dimensions=['x', 'y'], name='foobar', - attributes={'bar': 2}, encoding={'foo': 3}) + coords={'x': ['a', 'b'], 'y': [-1, -2]}, + dims=['x', 'y'], name='foobar', + attrs={'bar': 2}, encoding={'foo': 3}) actual = DataArray(expected) self.assertDataArrayIdentical(expected, actual) @@ -188,7 +188,7 @@ def test_constructor_from_self_described(self): panel = pd.Panel({0: frame}) actual = DataArray(panel) - expected = DataArray([data], expected.coordinates, ['dim_0', 'x', 'y']) + expected = DataArray([data], expected.coords, ['dim_0', 'x', 'y']) self.assertDataArrayIdentical(expected, actual) expected = Dataset({'foo': ('foo', ['a', 'b'])})['foo'] @@ -285,80 +285,80 @@ def test_loc(self): self.assertTrue(np.all(da.values == 0)) def test_loc_single_boolean(self): - data = DataArray([0, 1], coordinates=[[True, False]]) + data = DataArray([0, 1], coords=[[True, False]]) self.assertEqual(data.loc[True], 0) self.assertEqual(data.loc[False], 1) - def test_coordinates(self): - coordinates = [Coordinate('x', [-1, -2]), Coordinate('y', [0, 1, 2])] - da = DataArray(np.random.randn(2, 3), coordinates, name='foo') + def test_coords(self): + coords = [Coordinate('x', [-1, -2]), Coordinate('y', [0, 1, 2])] + da = DataArray(np.random.randn(2, 3), coords, name='foo') - self.assertEquals(2, len(da.coordinates)) + self.assertEquals(2, len(da.coords)) - self.assertEquals(['x', 'y'], list(da.coordinates)) + self.assertEquals(['x', 'y'], list(da.coords)) - self.assertTrue(da.coordinates[0].identical(coordinates[0])) - self.assertTrue(da.coordinates['x'].identical(coordinates[0])) - self.assertTrue(da.coordinates[1].identical(coordinates[1])) - self.assertTrue(da.coordinates['y'].identical(coordinates[1])) + self.assertTrue(da.coords[0].identical(coords[0])) + self.assertTrue(da.coords['x'].identical(coords[0])) + self.assertTrue(da.coords[1].identical(coords[1])) + self.assertTrue(da.coords['y'].identical(coords[1])) - self.assertIn('x', da.coordinates) - self.assertNotIn(0, da.coordinates) - self.assertNotIn('foo', da.coordinates) + self.assertIn('x', da.coords) + self.assertNotIn(0, da.coords) + self.assertNotIn('foo', da.coords) with self.assertRaises(KeyError): - da.coordinates['foo'] + da.coords['foo'] expected = dedent("""\ x: Int64Index([-1, -2], dtype='int64') y: Int64Index([0, 1, 2], dtype='int64')""") - actual = repr(da.coordinates) + actual = repr(da.coords) self.assertEquals(expected, actual) - def test_coordinates_modify(self): - da = DataArray(np.zeros((2, 3)), dimensions=['x', 'y']) + def test_coords_modify(self): + da = DataArray(np.zeros((2, 3)), dims=['x', 'y']) for k, v in [('x', ['a', 'b']), (0, ['c', 'd']), (-2, ['e', 'f'])]: - da.coordinates[k] = v - self.assertArrayEqual(da.coordinates[k], v) + da.coords[k] = v + self.assertArrayEqual(da.coords[k], v) actual = da.copy() orig_dataset = actual.dataset - actual.coordinates = [[5, 6], [7, 8, 9]] - expected = DataArray(np.zeros((2, 3)), coordinates=[[5, 6], [7, 8, 9]], - dimensions=['x', 'y']) + actual.coords = [[5, 6], [7, 8, 9]] + expected = DataArray(np.zeros((2, 3)), coords=[[5, 6], [7, 8, 9]], + dims=['x', 'y']) self.assertDataArrayIdentical(actual, expected) self.assertIsNot(actual.dataset, orig_dataset) actual = da.copy() - actual.coordinates = expected.coordinates + actual.coords = expected.coords self.assertDataArrayIdentical(actual, expected) actual = da.copy() - expected = DataArray(np.zeros((2, 3)), coordinates=[[5, 6], [7, 8, 9]], - dimensions=['foo', 'bar']) - actual.coordinates = expected.coordinates + expected = DataArray(np.zeros((2, 3)), coords=[[5, 6], [7, 8, 9]], + dims=['foo', 'bar']) + actual.coords = expected.coords self.assertDataArrayIdentical(actual, expected) with self.assertRaisesRegexp(ValueError, 'coordinate has size'): - da.coordinates['x'] = ['a'] + da.coords['x'] = ['a'] with self.assertRaises(IndexError): - da.coordinates['foobar'] = np.arange(4) + da.coords['foobar'] = np.arange(4) with self.assertRaisesRegexp(ValueError, 'coordinate has size'): - da.coordinates = da.isel(y=slice(2)).coordinates + da.coords = da.isel(y=slice(2)).coords # modify the coordinates on a coordinate itself x = DataArray(Coordinate('x', [10.0, 20.0, 30.0])) actual = x.copy() - actual.coordinates = [[0, 1, 2]] + actual.coords = [[0, 1, 2]] expected = DataArray(Coordinate('x', range(3))) self.assertDataArrayIdentical(actual, expected) actual = DataArray(Coordinate('y', [-10, -20, -30])) - actual.coordinates = expected.coordinates + actual.coords = expected.coords self.assertDataArrayIdentical(actual, expected) def test_reindex(self): @@ -565,7 +565,7 @@ def test_groupby_properties(self): def test_groupby_apply_identity(self): expected = self.make_groupby_example_array() - idx = expected.coordinates['y'] + idx = expected.coords['y'] identity = lambda x: x for g in ['x', 'y', 'abc', idx]: for shortcut in [False, True]: diff --git a/test/test_dataset.py b/test/test_dataset.py index e5247d91868..19356009685 100644 --- a/test/test_dataset.py +++ b/test/test_dataset.py @@ -51,7 +51,7 @@ def __getitem__(self, key): class InaccessibleVariableDataStore(backends.InMemoryDataStore): def __init__(self): - self.dimensions = OrderedDict() + self.dims = OrderedDict() self._variables = OrderedDict() self.attrs = OrderedDict() @@ -61,7 +61,7 @@ def set_variable(self, name, variable): def open_store_variable(self, var): data = indexing.LazilyIndexedArray(InaccessibleArray(var.values)) - return Variable(var.dimensions, data, var.attrs) + return Variable(var.dims, data, var.attrs) @property def store_variables(self): @@ -130,18 +130,18 @@ def test_variable(self): with self.assertRaises(ValueError): a['qux'] = (('time', 'x'), d.T) - def test_coordinates_create(self): + def test_coords_create(self): a = Dataset() vec = np.random.random((10,)) attributes = {'foo': 'bar'} a['x'] = ('x', vec, attributes) - self.assertTrue('x' in a.coordinates) - self.assertIsInstance(a.coordinates['x'].as_index, pd.Index) - self.assertVariableIdentical(a.coordinates['x'], a.variables['x']) + self.assertTrue('x' in a.coords) + self.assertIsInstance(a.coords['x'].as_index, pd.Index) + self.assertVariableIdentical(a.coords['x'], a.variables['x']) b = Dataset() b['x'] = ('x', vec, attributes) self.assertVariableIdentical(a['x'], b['x']) - self.assertEqual(a.dimensions, b.dimensions) + self.assertEqual(a.dims, b.dims) # this should work a['x'] = ('x', vec[:5]) a['z'] = ('x', np.arange(5)) @@ -154,50 +154,50 @@ def test_coordinates_create(self): a['y'] = ('y', arr) with self.assertRaises(ValueError): a['y'] = ('y', scal) - self.assertTrue('y' not in a.dimensions) + self.assertTrue('y' not in a.dims) - def test_coordinates_properties(self): + def test_coords_properties(self): data = Dataset({'x': ('x', [-1, -2]), 'y': ('y', [0, 1, 2]), 'foo': (['x', 'y'], np.random.randn(2, 3))}) - self.assertEquals(2, len(data.coordinates)) + self.assertEquals(2, len(data.coords)) - self.assertEquals(set(['x', 'y']), set(data.coordinates)) + self.assertEquals(set(['x', 'y']), set(data.coords)) - self.assertVariableIdentical(data.coordinates['x'], data['x'].variable) - self.assertVariableIdentical(data.coordinates['y'], data['y'].variable) + self.assertVariableIdentical(data.coords['x'], data['x'].variable) + self.assertVariableIdentical(data.coords['y'], data['y'].variable) - self.assertIn('x', data.coordinates) - self.assertNotIn(0, data.coordinates) - self.assertNotIn('foo', data.coordinates) + self.assertIn('x', data.coords) + self.assertNotIn(0, data.coords) + self.assertNotIn('foo', data.coords) with self.assertRaises(KeyError): - data.coordinates['foo'] + data.coords['foo'] with self.assertRaises(KeyError): - data.coordinates[0] + data.coords[0] expected = dedent("""\ x: Int64Index([-1, -2], dtype='int64') y: Int64Index([0, 1, 2], dtype='int64')""") - actual = repr(data.coordinates) + actual = repr(data.coords) self.assertEquals(expected, actual) - def test_coordinates_modify(self): + def test_coords_modify(self): data = Dataset({'x': ('x', [-1, -2]), 'y': ('y', [0, 1, 2]), 'foo': (['x', 'y'], np.random.randn(2, 3))}) actual = data.copy(deep=True) - actual.coordinates['x'] = ['a', 'b'] + actual.coords['x'] = ['a', 'b'] self.assertArrayEqual(actual['x'], ['a', 'b']) actual = data.copy(deep=True) - actual.coordinates['z'] = ['a', 'b'] + actual.coords['z'] = ['a', 'b'] self.assertArrayEqual(actual['z'], ['a', 'b']) with self.assertRaisesRegexp(ValueError, 'coordinate has size'): - data.coordinates['x'] = [-1] + data.coords['x'] = [-1] def test_equals_and_identical(self): data = create_test_data(seed=42) @@ -230,21 +230,21 @@ def test_isel(self): ret = data.isel(**slicers) # Verify that only the specified dimension was altered - self.assertItemsEqual(data.dimensions, ret.dimensions) - for d in data.dimensions: + self.assertItemsEqual(data.dims, ret.dims) + for d in data.dims: if d in slicers: - self.assertEqual(ret.dimensions[d], - np.arange(data.dimensions[d])[slicers[d]].size) + self.assertEqual(ret.dims[d], + np.arange(data.dims[d])[slicers[d]].size) else: - self.assertEqual(data.dimensions[d], ret.dimensions[d]) + self.assertEqual(data.dims[d], ret.dims[d]) # Verify that the data is what we expect for v in data.variables: - self.assertEqual(data[v].dimensions, ret[v].dimensions) + self.assertEqual(data[v].dims, ret[v].dims) self.assertEqual(data[v].attrs, ret[v].attrs) slice_list = [slice(None)] * data[v].values.ndim for d, s in iteritems(slicers): - if d in data[v].dimensions: - inds = np.nonzero(np.array(data[v].dimensions) == d)[0] + if d in data[v].dims: + inds = np.nonzero(np.array(data[v].dims) == d)[0] for ind in inds: slice_list[ind] = s expected = data[v].values[slice_list] @@ -255,17 +255,17 @@ def test_isel(self): data.isel(not_a_dim=slice(0, 2)) ret = data.isel(dim1=0) - self.assertEqual({'time': 20, 'dim2': 50, 'dim3': 10}, ret.dimensions) + self.assertEqual({'time': 20, 'dim2': 50, 'dim3': 10}, ret.dims) self.assertItemsEqual(list(data.noncoordinates) + ['dim1'], ret.noncoordinates) ret = data.isel(time=slice(2), dim1=0, dim2=slice(5)) - self.assertEqual({'time': 2, 'dim2': 5, 'dim3': 10}, ret.dimensions) + self.assertEqual({'time': 2, 'dim2': 5, 'dim3': 10}, ret.dims) self.assertItemsEqual(list(data.noncoordinates) + ['dim1'], ret.noncoordinates) ret = data.isel(time=0, dim1=0, dim2=slice(5)) - self.assertItemsEqual({'dim2': 5, 'dim3': 10}, ret.dimensions) + self.assertItemsEqual({'dim2': 5, 'dim3': 10}, ret.dims) self.assertItemsEqual(list(data.noncoordinates) + ['dim1', 'time'], ret.noncoordinates) @@ -424,7 +424,7 @@ def test_rename(self): variables[v] = variables.pop(k) for k, v in iteritems(variables): - dims = list(v.dimensions) + dims = list(v.dims) for name, newname in iteritems(newnames): if name in dims: dims[dims.index(name)] = newname @@ -442,7 +442,7 @@ def test_rename(self): # verify that we can rename a variable without accessing the data var1 = data['var1'] - data['var1'] = (var1.dimensions, InaccessibleArray(var1.values)) + data['var1'] = (var1.dims, InaccessibleArray(var1.values)) renamed = data.rename(newnames) with self.assertRaises(UnexpectedDataAccess): renamed['renamed_var1'].values @@ -455,7 +455,7 @@ def test_rename_inplace(self): data.rename({'x': 'y'}, inplace=True) self.assertDatasetIdentical(data, renamed) self.assertFalse(data.equals(copied)) - self.assertEquals(data.dimensions, {'y': 3, 't': 3}) + self.assertEquals(data.dims, {'y': 3, 't': 3}) # check virtual variables self.assertArrayEqual(data['t.dayofyear'], [1, 2, 3]) @@ -557,7 +557,7 @@ def test_squeeze(self): data = Dataset({'foo': (['x', 'y', 'z'], [[[1], [2]]])}) for args in [[], [['x']], [['x', 'z']]]: def get_args(v): - return [set(args[0]) & set(v.dimensions)] if args else [] + return [set(args[0]) & set(v.dims)] if args else [] expected = Dataset(dict((k, v.squeeze(*get_args(v))) for k, v in iteritems(data.variables))) self.assertDatasetIdentical(expected, data.squeeze(*args)) @@ -599,8 +599,8 @@ def test_groupby_errors(self): data.groupby(np.arange(10)) with self.assertRaisesRegexp(ValueError, 'length does not match'): data.groupby(data['dim1'][:3]) - with self.assertRaisesRegexp(ValueError, "must have a 'dimensions'"): - data.groupby(data.coordinates['dim1'].as_index) + with self.assertRaisesRegexp(ValueError, "must have a 'dims'"): + data.groupby(data.coords['dim1'].as_index) def test_groupby_reduce(self): data = Dataset({'xy': (['x', 'y'], np.random.randn(3, 4)), @@ -631,7 +631,7 @@ def test_concat(self): def rectify_dim_order(dataset): # return a new dataset with all variable dimensions tranposed into # the order in which they are found in `data` - return Dataset(dict((k, v.transpose(*data[k].dimensions)) + return Dataset(dict((k, v.transpose(*data[k].dims)) for k, v in iteritems(dataset.variables)), dataset.attrs) @@ -645,7 +645,7 @@ def rectify_dim_order(dataset): datasets = [g for _, g in data.groupby(dim, squeeze=True)] concat_over = [k for k, v in iteritems(data.variables) - if dim in v.dimensions and k != dim] + if dim in v.dims and k != dim] actual = Dataset.concat(datasets, data[dim], concat_over=concat_over) self.assertDatasetIdentical(data, rectify_dim_order(actual)) @@ -655,12 +655,12 @@ def rectify_dim_order(dataset): # Now add a new variable that doesn't depend on any of the current # dims and make sure the mode argument behaves as expected - data['var4'] = ('dim4', np.arange(data.dimensions['dim3'])) + data['var4'] = ('dim4', np.arange(data.dims['dim3'])) for dim in ['dim1', 'dim2', 'dim3']: datasets = [g for _, g in data.groupby(dim, squeeze=False)] actual = Dataset.concat(datasets, data[dim], mode='all') expected = np.array([data['var4'].values - for _ in range(data.dimensions[dim])]) + for _ in range(data.dims[dim])]) self.assertArrayEqual(actual['var4'].values, expected) actual = Dataset.concat(datasets, data[dim], mode='different') @@ -668,14 +668,13 @@ def rectify_dim_order(dataset): actual = Dataset.concat(datasets, data[dim], mode='minimal') self.assertDataArrayEqual(data['var4'], actual['var4']) - # verify that the dimension argument takes precedence over + # verify that the dim argument takes precedence over # concatenating dataset variables of the same name - dimension = (2 * data['dim1']).rename('dim1') + dim = (2 * data['dim1']).rename('dim1') datasets = [g for _, g in data.groupby('dim1', squeeze=False)] expected = data.copy() - expected['dim1'] = dimension - self.assertDatasetIdentical( - expected, Dataset.concat(datasets, dimension)) + expected['dim1'] = dim + self.assertDatasetIdentical(expected, Dataset.concat(datasets, dim)) # TODO: factor this into several distinct tests data = create_test_data() @@ -741,7 +740,7 @@ def test_pickle(self): roundtripped = pickle.loads(pickle.dumps(data)) self.assertDatasetIdentical(data, roundtripped) # regression test for #167: - self.assertEqual(data.dimensions, roundtripped.dimensions) + self.assertEqual(data.dims, roundtripped.dims) def test_lazy_load(self): store = InaccessibleVariableDataStore() @@ -761,7 +760,7 @@ def test_lazy_load(self): def test_reduce(self): data = create_test_data() - self.assertEqual(len(data.mean().coordinates), 0) + self.assertEqual(len(data.mean().coords), 0) expected = data.max() for var in data.noncoordinates: @@ -769,23 +768,23 @@ def test_reduce(self): actual = expected[var] self.assertDataArrayEqual(expected, actual) - self.assertDatasetEqual(data.min(dimension=['dim1']), - data.min(dimension='dim1')) + self.assertDatasetEqual(data.min(dim=['dim1']), + data.min(dim='dim1')) for reduct, expected in [('dim2', ['dim1', 'dim3', 'time']), (['dim2', 'time'], ['dim1', 'dim3']), (('dim2', 'time'), ['dim1', 'dim3']), ((), ['dim1', 'dim2', 'dim3', 'time'])]: - actual = data.min(dimension=reduct).dimensions + actual = data.min(dim=reduct).dims print(reduct, actual, expected) self.assertItemsEqual(actual, expected) - self.assertDatasetEqual(data.mean(dimension=[]), data) + self.assertDatasetEqual(data.mean(dim=[]), data) - def test_reduce_bad_dimension(self): + def test_reduce_bad_dim(self): data = create_test_data() with self.assertRaisesRegexp(ValueError, 'Dataset does not contain'): - ds = data.mean(dimension='bad_dim') + ds = data.mean(dim='bad_dim') def test_reduce_non_numeric(self): data1 = create_test_data(seed=44) @@ -798,8 +797,8 @@ def test_reduce_non_numeric(self): self.assertTrue('var4' not in data1.mean()) self.assertDatasetEqual(data1.mean(), data2.mean()) - self.assertDatasetEqual(data1.mean(dimension='dim1'), - data2.mean(dimension='dim1')) + self.assertDatasetEqual(data1.mean(dim='dim1'), + data2.mean(dim='dim1')) def test_reduce_keep_attrs(self): data = create_test_data() diff --git a/test/test_variable.py b/test/test_variable.py index a2c69f93508..d58305dc5d6 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -18,7 +18,7 @@ class VariableSubclassTestCases(object): def test_properties(self): data = 0.5 * np.arange(10) v = self.cls(['time'], data, {'foo': 'bar'}) - self.assertEqual(v.dimensions, ('time',)) + self.assertEqual(v.dims, ('time',)) self.assertArrayEqual(v.values, data) self.assertEqual(v.dtype, float) self.assertEqual(v.shape, (10,)) @@ -339,9 +339,9 @@ def test_equals_and_identical(self): d = np.random.rand(10, 3) d[0, 0] = np.nan v1 = Variable(('dim1', 'dim2'), data=d, - attributes={'att1': 3, 'att2': [1, 2, 3]}) + attrs={'att1': 3, 'att2': [1, 2, 3]}) v2 = Variable(('dim1', 'dim2'), data=d, - attributes={'att1': 3, 'att2': [1, 2, 3]}) + attrs={'att1': 3, 'att2': [1, 2, 3]}) self.assertTrue(v1.equals(v2)) self.assertTrue(v1.identical(v2)) @@ -374,11 +374,11 @@ def test_as_variable(self): self.assertIsInstance(as_variable(ds['x']), Variable) self.assertIsInstance(as_variable(ds['x'], strict=False), DataArray) - FakeVariable = namedtuple('FakeVariable', 'values dimensions') - fake_xarray = FakeVariable(expected.values, expected.dimensions) + FakeVariable = namedtuple('FakeVariable', 'values dims') + fake_xarray = FakeVariable(expected.values, expected.dims) self.assertVariableIdentical(expected, as_variable(fake_xarray)) - xarray_tuple = (expected.dimensions, expected.values) + xarray_tuple = (expected.dims, expected.values) self.assertVariableIdentical(expected, as_variable(xarray_tuple)) with self.assertRaisesRegexp(TypeError, 'cannot convert numpy'): @@ -537,7 +537,7 @@ def test_reduce(self): self.assertVariableIdentical(v.reduce(np.std, 'x'), Variable(['y'], self.d.std(axis=0))) self.assertVariableIdentical(v.reduce(np.std, axis=0), - v.reduce(np.std, dimension='x')) + v.reduce(np.std, dim='x')) self.assertVariableIdentical(v.reduce(np.std, ['y', 'x']), Variable([], self.d.std(axis=(0, 1)))) self.assertVariableIdentical(v.reduce(np.std), @@ -548,7 +548,7 @@ def test_reduce(self): self.assertVariableIdentical(v.mean('x'), v.reduce(np.mean, 'x')) with self.assertRaisesRegexp(ValueError, 'cannot supply both'): - v.mean(dimension='x', axis=0) + v.mean(dim='x', axis=0) def test_reduce_keep_attrs(self): _attrs = {'units': 'test', 'long_name': 'testing'} diff --git a/xray/backends/common.py b/xray/backends/common.py index 0d83b63ab66..84816f692e7 100644 --- a/xray/backends/common.py +++ b/xray/backends/common.py @@ -60,6 +60,6 @@ def set_variables(self, variables): self.set_variable(_encode_variable_name(vn), v) def set_necessary_dimensions(self, variable): - for d, l in zip(variable.dimensions, variable.shape): + for d, l in zip(variable.dims, variable.shape): if d not in self.ds.dimensions: self.set_dimension(d, l) diff --git a/xray/backends/netCDF4_.py b/xray/backends/netCDF4_.py index cb1d6b5caea..90c210b3ca6 100644 --- a/xray/backends/netCDF4_.py +++ b/xray/backends/netCDF4_.py @@ -171,7 +171,7 @@ def set_variable(self, name, variable): nc4_var = self.ds.createVariable( varname=name, datatype=datatype, - dimensions=variable.dimensions, + dimensions=variable.dims, zlib=encoding.get('zlib', False), complevel=encoding.get('complevel', 4), shuffle=encoding.get('shuffle', True), diff --git a/xray/backends/netcdf3.py b/xray/backends/netcdf3.py index 4abb97bbc09..78071b7647a 100644 --- a/xray/backends/netcdf3.py +++ b/xray/backends/netcdf3.py @@ -52,7 +52,7 @@ def coerce_nc3_dtype(arr): def encode_nc3_variable(var): - dimensions = var.dimensions + dimensions = var.dims data = coerce_nc3_dtype(var.values) if data.dtype.kind == 'S' and data.dtype.itemsize > 1: data = conventions.string_to_char(data) diff --git a/xray/backends/scipy_.py b/xray/backends/scipy_.py index ed63217fc36..20d3278a848 100644 --- a/xray/backends/scipy_.py +++ b/xray/backends/scipy_.py @@ -95,7 +95,7 @@ def set_variable(self, name, variable): conventions.encode_cf_variable(variable)) self.set_necessary_dimensions(variable) data = variable.values - self.ds.createVariable(name, data.dtype, variable.dimensions) + self.ds.createVariable(name, data.dtype, variable.dims) scipy_var = self.ds.variables[name] if data.ndim == 0: scipy_var.assignValue(data) diff --git a/xray/common.py b/xray/common.py index 378ce95aa8d..4419fe1116e 100644 --- a/xray/common.py +++ b/xray/common.py @@ -3,21 +3,22 @@ import numpy as np from .pycompat import basestring, iteritems +from . import utils class ImplementsArrayReduce(object): @classmethod def _reduce_method(cls, func): - def wrapped_func(self, dimension=None, axis=None, keep_attrs=False, + def wrapped_func(self, dim=None, axis=None, keep_attrs=False, **kwargs): - return self.reduce(func, dimension, axis, keep_attrs, **kwargs) + return self.reduce(func, dim, axis, keep_attrs, **kwargs) return wrapped_func _reduce_extra_args_docstring = \ - """dimension : str or sequence of str, optional + """dim : str or sequence of str, optional Dimension(s) over which to apply `{name}`. axis : int or sequence of int, optional - Axis(es) over which to apply `{name}`. Only one of the 'dimension' + Axis(es) over which to apply `{name}`. Only one of the 'dim' and 'axis' arguments can be supplied. If neither are supplied, then `{name}` is calculated over axes.\n""" @@ -25,12 +26,12 @@ def wrapped_func(self, dimension=None, axis=None, keep_attrs=False, class ImplementsDatasetReduce(object): @classmethod def _reduce_method(cls, func): - def wrapped_func(self, dimension=None, keep_attrs=False, **kwargs): - return self.reduce(func, dimension, keep_attrs, **kwargs) + def wrapped_func(self, dim=None, keep_attrs=False, **kwargs): + return self.reduce(func, dim, keep_attrs, **kwargs) return wrapped_func _reduce_extra_args_docstring = \ - """dimension : str or sequence of str, optional + """dim : str or sequence of str, optional Dimension(s) over which to apply `func`. By default `func` is applied over all dimensions.\n""" @@ -73,12 +74,12 @@ def __iter__(self): def T(self): return self.transpose() - def get_axis_num(self, dimension): + def get_axis_num(self, dim): """Return axis number(s) corresponding to dimension(s) in this array. Parameters ---------- - dimension : str or iterable of str + dim : str or iterable of str Dimension name(s) for which to lookup axes. Returns @@ -86,17 +87,17 @@ def get_axis_num(self, dimension): int or tuple of int Axis number or numbers corresponding to the given dimensions. """ - if isinstance(dimension, basestring): - return self._get_axis_num(dimension) + if isinstance(dim, basestring): + return self._get_axis_num(dim) else: - return tuple(self._get_axis_num(dim) for dim in dimension) + return tuple(self._get_axis_num(d) for d in dim) def _get_axis_num(self, dim): try: - return self.dimensions.index(dim) + return self.dims.index(dim) except ValueError: raise ValueError("%r not found in array dimensions %r" % - (dim, self.dimensions)) + (dim, self.dims)) class AbstractCoordinates(Mapping): @@ -107,13 +108,13 @@ def __getitem__(self, key): raise NotImplementedError def __iter__(self): - return iter(self._data.dimensions) + return iter(self._data.dims) def __len__(self): - return len(self._data.dimensions) + return len(self._data.dims) def __contains__(self, key): - return key in self._data.dimensions + return key in self._data.dims def __repr__(self): return '\n'.join(_wrap_indent(repr(v.as_index), '%s: ' % k) @@ -156,18 +157,18 @@ def array_repr(arr): else: name_str = '' dim_summary = ', '.join('%s: %s' % (k, v) for k, v - in zip(arr.dimensions, arr.shape)) + in zip(arr.dims, arr.shape)) summary = [''% (type(arr).__name__, name_str, dim_summary)] if arr.size < 1e5 or arr._in_memory(): summary.append(repr(arr.values)) else: summary.append('[%s values with dtype=%s]' % (arr.size, arr.dtype)) if hasattr(arr, 'dataset'): - if arr.coordinates: + if arr.coords: summary.append('Coordinates:') - summary.append(_wrap_indent(repr(arr.coordinates), ' ')) + summary.append(_wrap_indent(repr(arr.coords), ' ')) other_vars = [k for k in arr.dataset - if k not in arr.coordinates and k != arr.name] + if k not in arr.coords and k != arr.name] if other_vars: summary.append('Linked dataset variables:') summary.append(' ' + ', '.join(other_vars)) @@ -193,18 +194,18 @@ def dataset_repr(ds): max_name_length = max(len(k) for k in ds.variables) if ds else 0 first_col_width = max(4 + max_name_length, 16) coords_str = pretty_print('Dimensions:', first_col_width) - all_dim_strings = ['%s: %s' % (k, v) for k, v in iteritems(ds.dimensions)] + all_dim_strings = ['%s: %s' % (k, v) for k, v in iteritems(ds.dims)] summary.append('%s(%s)' % (coords_str, ', '.join(all_dim_strings))) def summarize_var(k, not_found=' ', found=int): v = ds.variables[k] dim_strs = [] - for n, d in enumerate(ds.dimensions): + for n, d in enumerate(ds.dims): length = len(all_dim_strings[n]) prepend = ' ' * (length // 2) - if d in v.dimensions: + if d in v.dims: if found is int: - indicator = str(v.dimensions.index(d)) + indicator = str(v.dims.index(d)) else: indicator = found else: @@ -221,7 +222,7 @@ def summarize_variables(variables, not_found=' ', found=int): return [' None'] summary.append('Coordinates:') - summary.extend(summarize_variables(ds.coordinates, ' ', 'X')) + summary.extend(summarize_variables(ds.coords, ' ', 'X')) summary.append('Noncoordinates:') summary.extend(summarize_variables(ds.noncoordinates, ' ', int)) diff --git a/xray/conventions.py b/xray/conventions.py index 1e1a78d2f07..2607dcbe2ca 100644 --- a/xray/conventions.py +++ b/xray/conventions.py @@ -394,7 +394,7 @@ def encode_cf_variable(var): """Converts an Variable into an Variable suitable for saving as a netCDF variable """ - dimensions = var.dimensions + dimensions = var.dims data = var.values attributes = var.attrs.copy() encoding = var.encoding.copy() @@ -472,7 +472,7 @@ def decode_cf_variable(var, concat_characters=True, mask_and_scale=True, # use _data instead of data so as not to trigger loading data var = xray.variable.as_variable(var) data = var._data - dimensions = var.dimensions + dimensions = var.dims attributes = var.attrs.copy() encoding = var.encoding.copy() @@ -515,7 +515,7 @@ def decode_cf_variables(variables, concat_characters=True, mask_and_scale=True, """ dimensions_used_by = defaultdict(list) for v in variables.values(): - for d in v.dimensions: + for d in v.dims: dimensions_used_by[d].append(v) def stackable(dim): @@ -523,14 +523,14 @@ def stackable(dim): if dim in variables: return False for v in dimensions_used_by[dim]: - if v.dtype.kind != 'S' or dim != v.dimensions[-1]: + if v.dtype.kind != 'S' or dim != v.dims[-1]: return False return True new_vars = OrderedDict() for k, v in iteritems(variables): concat = (concat_characters and v.dtype.kind == 'S' and v.ndim > 0 and - stackable(v.dimensions[-1])) + stackable(v.dims[-1])) new_vars[k] = decode_cf_variable( v, concat_characters=concat, mask_and_scale=mask_and_scale, decode_times=decode_times) diff --git a/xray/data_array.py b/xray/data_array.py index 3bcf961239e..69ad1ec1f48 100644 --- a/xray/data_array.py +++ b/xray/data_array.py @@ -22,44 +22,44 @@ def _is_dict_like(value): return hasattr(value, '__getitem__') and hasattr(value, 'keys') -def _infer_coordinates_and_dimensions(shape, coords, dimensions): +def _infer_coords_and_dims(shape, coords, dims): """All the logic for creating a new DataArray""" - if isinstance(dimensions, basestring): - dimensions = [dimensions] + if isinstance(dims, basestring): + dims = [dims] if _is_dict_like(coords): - if dimensions is None: - dimensions = list(coords.keys()) + if dims is None: + dims = list(coords.keys()) else: - bad_coords = [dim for dim in coords if dim not in dimensions] + bad_coords = [dim for dim in coords if dim not in dims] if bad_coords: raise ValueError('coordinates %r are not array dimensions' % bad_coords) - coords = [coords.get(d, None) for d in dimensions] + coords = [coords.get(d, None) for d in dims] elif coords is not None and len(coords) != len(shape): raise ValueError('%s coordinates supplied but data has ndim=%s' % (len(coords), len(shape))) - if dimensions is None: - dimensions = ['dim_%s' % n for n in range(len(shape))] + if dims is None: + dims = ['dim_%s' % n for n in range(len(shape))] if coords is not None: for n, idx in enumerate(coords): if hasattr(idx, 'name') and idx.name is not None: - dimensions[n] = idx.name + dims[n] = idx.name else: - for d in dimensions: + for d in dims: if not isinstance(d, basestring): raise TypeError('dimension %s is not a string' % d) if coords is None: coords = [None] * len(shape) coords = [idx if isinstance(idx, AbstractArray) else - variable.Coordinate(dimensions[n], idx) if idx is not None else - variable.Coordinate(dimensions[n], np.arange(shape[n])) + variable.Coordinate(dims[n], idx) if idx is not None else + variable.Coordinate(dims[n], np.arange(shape[n])) for n, idx in enumerate(coords)] - return coords, dimensions + return coords, dims class _LocIndexer(object): @@ -70,7 +70,7 @@ def _remap_key(self, key): label_indexers = self.data_array._key_to_indexers(key) indexers = [] for dim, label in iteritems(label_indexers): - index = self.data_array.coordinates[dim] + index = self.data_array.coords[dim] indexers.append(indexing.convert_label_indexer(index, label)) return tuple(indexers) @@ -95,17 +95,17 @@ class DataArrayCoordinates(AbstractCoordinates): objects, but it also supports list-like indexing with integers. """ def __getitem__(self, key): - if key in self._data.dimensions: + if key in self._data.dims: return self._data.dataset.variables[key] elif isinstance(key, (int, np.integer)): - dimension = self._data.dimensions[key] - return self._data.dataset.variables[dimension] + dim = self._data.dims[key] + return self._data.dataset.variables[dim] else: raise KeyError(repr(key)) def __setitem__(self, key, value): if isinstance(key, (int, np.integer)): - key = self._data.dimensions[key] + key = self._data.dims[key] if key not in self: raise IndexError('%s is not a coordinate') @@ -145,15 +145,15 @@ class DataArray(AbstractArray): Attributes ---------- - dimensions : tuple + dims : tuple Dimension names associated with this array. values : np.ndarray Access or modify DataArray values as a numpy array. - coordinates : OrderedDict + coords : dict-like Dictionary of Coordinate objects that label values along each dimension. """ - def __init__(self, data=None, coordinates=None, dimensions=None, name=None, - attributes=None, encoding=None): + def __init__(self, data=None, coords=None, dims=None, name=None, + attrs=None, encoding=None): """ Parameters ---------- @@ -163,19 +163,19 @@ def __init__(self, data=None, coordinates=None, dimensions=None, name=None, object, attempst are made to use this array's metadata to fill in other unspecified arguments. This argument is required unless the 'dataset' argument is provided. - coordinates : sequence or dict of array_like objects, optional + coords : sequence or dict of array_like objects, optional Coordinates (tick labels) to use for indexing along each dimension. If dict-like, should be a mapping from dimension names to the corresponding coordinates. - dimensions : str or sequence of str, optional + dims : str or sequence of str, optional Name(s) of the the data dimension(s). Must be either a string (only for 1D data) or a sequence of strings with length equal to the number of dimensions. If this argument is omited, dimension names - are taken from coordinates (if possible) and otherwise default to + are taken from ``coords`` (if possible) and otherwise default to ``['dim_0', ... 'dim_n']``. name : str or None, optional Name of this array. - attributes : dict_like or None, optional + attrs : dict_like or None, optional Attributes to assign to the new variable. By default, an empty attribute dictionary is initialized. encoding : dict_like or None, optional @@ -186,31 +186,29 @@ def __init__(self, data=None, coordinates=None, dimensions=None, name=None, Unrecognized keys are ignored. """ # try to fill in arguments from data if they weren't supplied - if coordinates is None: - coordinates = getattr(data, 'coordinates', None) + if coords is None: + coords = getattr(data, 'coords', None) if isinstance(data, pd.Series): - coordinates = [data.index] + coords = [data.index] elif isinstance(data, pd.DataFrame): - coordinates = [data.index, data.columns] + coords = [data.index, data.columns] elif isinstance(data, (pd.Index, variable.Coordinate)): - coordinates = [data] + coords = [data] elif isinstance(data, pd.Panel): - coordinates = [data.items, data.major_axis, data.minor_axis] - if dimensions is None: - dimensions = getattr(data, 'dimensions', None) + coords = [data.items, data.major_axis, data.minor_axis] + if dims is None: + dims = getattr(data, 'dims', None) if name is None: name = getattr(data, 'name', None) - if attributes is None: - attributes = getattr(data, 'attrs', None) + if attrs is None: + attrs = getattr(data, 'attrs', None) if encoding is None: encoding = getattr(data, 'encoding', None) data = variable._as_compatible_data(data) - coordinates, dimensions = _infer_coordinates_and_dimensions( - data.shape, coordinates, dimensions) - variables = OrderedDict((var.name, var) for var in coordinates) - variables[name] = variable.Variable( - dimensions, data, attributes, encoding) + coords, dims = _infer_coords_and_dims(data.shape, coords, dims) + variables = OrderedDict((var.name, var) for var in coords) + variables[name] = variable.Variable(dims, data, attrs, encoding) dataset = xray.Dataset(variables) self._dataset = dataset @@ -296,23 +294,28 @@ def as_index(self): return self.variable.to_coord().as_index @property - def dimensions(self): - return self.variable.dimensions + def dims(self): + return self.variable.dims - @dimensions.setter - def dimensions(self, value): + @dims.setter + def dims(self, value): with self._set_new_dataset() as ds: if not len(value) == self.ndim: raise ValueError('%s dimensions supplied but data has ndim=%s' % (len(value), self.ndim)) - name_map = dict(zip(self.dimensions, value)) + name_map = dict(zip(self.dims, value)) ds.rename(name_map, inplace=True) if self.name in name_map: self._name = name_map[self.name] + @property + def dimensions(self): + utils.alias_warning('dimensions', 'dims') + return self.dims + def _key_to_indexers(self, key): return OrderedDict( - zip(self.dimensions, indexing.expanded_indexer(key, self.ndim))) + zip(self.dims, indexing.expanded_indexer(key, self.ndim))) def __getitem__(self, key): if isinstance(key, basestring): @@ -344,12 +347,12 @@ def loc(self): @property def attributes(self): - utils.alias_warning('attributes', 'attrs', 3) + utils.alias_warning('attributes', 'attrs') return self.variable.attrs @attributes.setter def attributes(self, value): - utils.alias_warning('attributes', 'attrs', 3) + utils.alias_warning('attributes', 'attrs') self.variable.attrs = value @property @@ -373,11 +376,10 @@ def encoding(self, value): @property def indexes(self): - utils.alias_warning('indexes', 'coordinates', 3) - return self.coordinates + return self.coords @property - def coordinates(self): + def coords(self): """Dictionary-like container of xray.Coordinate objects used for label based indexing. @@ -386,8 +388,8 @@ def coordinates(self): """ return DataArrayCoordinates(self) - @coordinates.setter - def coordinates(self, value): + @coords.setter + def coords(self, value): if not len(value) == self.ndim: raise ValueError('%s coordinates supplied but data has ndim=%s' % (len(value), self.ndim)) @@ -396,21 +398,26 @@ def coordinates(self, value): # DataArrayCoordinates? if isinstance(value, DataArrayCoordinates): # yes, this is regretably complex and probably slow - name_map = dict(zip(self.dimensions, value.keys())) + name_map = dict(zip(self.dims, value.keys())) ds.rename(name_map, inplace=True) name = name_map.get(self.name, self.name) - dimensions = ds[name].dimensions + dims = ds[name].dims value = value.values() else: name = self.name - dimensions = self.dimensions + dims = self.dims - for k, v in zip(dimensions, value): + for k, v in zip(dims, value): coord = DataArrayCoordinates._convert_to_coord( - k, v, expected_size=ds.coordinates[k].size) + k, v, expected_size=ds.coords[k].size) ds[k] = coord self._name = name + @property + def coordinates(self): + utils.alias_warning('coordinates', 'coords') + return self.coords + def load_data(self): """Manually trigger loading of this array's data from disk or a remote source into memory and return this array. @@ -421,7 +428,7 @@ def load_data(self): working with many file objects on disk. """ self.variable.load_data() - for coord in self.coordinates.values(): + for coord in self.coords.values(): coord.load_data() return self @@ -502,11 +509,11 @@ def reindex_like(self, other, copy=True): DataArray.reindex align """ - return self.reindex(copy=copy, **other.coordinates) + return self.reindex(copy=copy, **other.coords) - def reindex(self, copy=True, **coordinates): - """Conform this object onto a new set of coordinates or pandas.Index - objects, filling in missing values with NaN. + def reindex(self, copy=True, **indexers): + """Conform this object onto a new set of coordinates, filling in + missing values with NaN. Parameters ---------- @@ -514,7 +521,7 @@ def reindex(self, copy=True, **coordinates): If `copy=True`, the returned array's dataset contains only copied variables. If `copy=False` and no reindexing is required then original variables from this array's dataset are returned. - **coordinates : dict + **indexers : dict Dictionary with keys given by dimension names and values given by arrays of coordinates tick labels. Any mis-matched coordinate values will be filled in with NaN, and any mis-matched dimension names will @@ -532,7 +539,7 @@ def reindex(self, copy=True, **coordinates): align """ ds = self.select_vars().dataset - reindexed_ds = ds.reindex(copy=copy, **coordinates) + reindexed_ds = ds.reindex(copy=copy, **indexers) return reindexed_ds[self.name] def rename(self, new_name_or_name_dict): @@ -580,7 +587,7 @@ def drop_vars(self, *names): raise ValueError('cannot drop the name of a DataArray with ' 'drop_vars. Use the `drop_vars` method of ' 'the dataset instead.') - if any(name in self.dimensions for name in names): + if any(name in self.dims for name in names): raise ValueError('cannot drop a coordinate variable from a ' 'DataArray. Use the `drop_vars` method of ' 'the dataset instead.') @@ -615,7 +622,7 @@ def groupby(self, group, squeeze=True): group = self.dataset[group] return groupby.ArrayGroupBy(self, group, squeeze=squeeze) - def transpose(self, *dimensions): + def transpose(self, *dims): """Return a new DataArray object with transposed dimensions. Note: Although this operation returns a view of this array's data, it @@ -623,7 +630,7 @@ def transpose(self, *dimensions): Parameters ---------- - *dimensions : str, optional + *dims : str, optional By default, reverse the dimensions. Otherwise, reorder the dimensions to this order. @@ -643,15 +650,15 @@ def transpose(self, *dimensions): Array.transpose """ ds = self.dataset.copy() - ds[self.name] = self.variable.transpose(*dimensions) + ds[self.name] = self.variable.transpose(*dims) return ds[self.name] - def squeeze(self, dimension=None): + def squeeze(self, dim=None): """Return a new DataArray object with squeezed data. Parameters ---------- - dimensions : None or str or tuple of str, optional + dim : None or str or tuple of str, optional Selects a subset of the length one dimensions. If a dimension is selected with length greater than one, an error is raised. If None, all length one dimensions are squeezed. @@ -671,11 +678,10 @@ def squeeze(self, dimension=None): -------- numpy.squeeze """ - ds = self.dataset.squeeze(dimension) + ds = self.dataset.squeeze(dim) return ds[self.name] - def reduce(self, func, dimension=None, axis=None, keep_attrs=False, - **kwargs): + def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs): """Reduce this array by applying `func` along some dimension(s). Parameters @@ -684,11 +690,11 @@ def reduce(self, func, dimension=None, axis=None, keep_attrs=False, Function which can be called in the form `f(x, axis=axis, **kwargs)` to return the result of reducing an np.ndarray over an integer valued axis. - dimension : str or sequence of str, optional + dim : str or sequence of str, optional Dimension(s) over which to apply `func`. axis : int or sequence of int, optional Axis(es) over which to repeatedly apply `func`. Only one of the - 'dimension' and 'axis' arguments can be supplied. If neither are + 'dim' and 'axis' arguments can be supplied. If neither are supplied, then the reduction is calculated over the flattened array (by calling `f(x)` without an axis argument). keep_attrs : bool, optional @@ -704,13 +710,17 @@ def reduce(self, func, dimension=None, axis=None, keep_attrs=False, DataArray with this object's array replaced with an array with summarized data and the indicated dimension(s) removed. """ - var = self.variable.reduce(func, dimension, axis, keep_attrs, **kwargs) - drop = set(self.dimensions) - set(var.dimensions) + if 'dimension' in kwargs and dim is None: + dim = kwargs.pop('dimension') + utils.alias_warning('dimension', 'dim') + + var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs) + drop = set(self.dims) - set(var.dims) # For now, take an aggressive strategy of removing all variables # associated with any dropped dimensions # TODO: save some summary (mean? bounds?) of dropped variables drop |= set(k for k, v in iteritems(self.dataset.variables) - if any(dim in drop for dim in v.dimensions)) + if any(dim in drop for dim in v.dims)) ds = self.dataset.drop_vars(*drop) ds[self.name] = var @@ -720,7 +730,7 @@ def reduce(self, func, dimension=None, axis=None, keep_attrs=False, return ds[self.name] @classmethod - def concat(cls, arrays, dimension='concat_dimension', indexers=None, + def concat(cls, arrays, dim='concat_dim', indexers=None, concat_over=None): """Stack arrays along a new or existing dimension to form a new DataArray. @@ -731,7 +741,7 @@ def concat(cls, arrays, dimension='concat_dimension', indexers=None, Arrays to stack together. Each variable is expected to have matching dimensions and shape except for along the concatenated dimension. - dimension : str or Array, optional + dim : str or Array, optional Name of the dimension to stack along. This can either be a new dimension name, in which case it is added along axis=0, or an existing dimension name, in which case the location of the @@ -775,8 +785,8 @@ def concat(cls, arrays, dimension='concat_dimension', indexers=None, elif isinstance(concat_over, basestring): concat_over = set([concat_over]) concat_over = set(concat_over) | set([name]) - ds = xray.Dataset.concat(datasets, dimension, indexers, - concat_over=concat_over) + ds = xray.Dataset.concat(datasets, dim, indexers, + concat_over=concat_over) return ds[name] def to_dataframe(self): @@ -795,8 +805,8 @@ def to_series(self): Unlike `to_dataframe`, only this array is including in the returned series; the other non-coordinate variables in the dataset are not. """ - index = multi_index_from_product(self.coordinates.values(), - names=self.coordinates.keys()) + index = multi_index_from_product(self.coords.values(), + names=self.coords.keys()) return pd.Series(self.values.reshape(-1), index=index, name=self.name) @classmethod @@ -825,8 +835,8 @@ def equals(self, other): try: return (all(k1 == k2 and v1.equals(v2) for (k1, v1), (k2, v2) - in zip(self.coordinates.items(), - other.coordinates.items())) + in zip(self.coords.items(), + other.coords.items())) and self.variable.equals(other.variable)) except AttributeError: return False @@ -839,19 +849,19 @@ def identical(self, other): return (self.name == other.name and all(k1 == k2 and v1.identical(v2) for (k1, v1), (k2, v2) - in zip(self.coordinates.items(), - other.coordinates.items())) + in zip(self.coords.items(), + other.coords.items())) and self.variable.identical(other.variable)) except AttributeError: return False def _select_coords(self): - return xray.Dataset(self.coordinates) + return xray.Dataset(self.coords) def __array_wrap__(self, obj, context=None): new_var = self.variable.__array_wrap__(obj, context) ds = self._select_coords() - if (self.name,) == self.dimensions: + if (self.name,) == self.dims: # use a new name for coordinate variables name = None else: @@ -868,10 +878,10 @@ def func(self, *args, **kwargs): def _check_coords_compat(self, other): # TODO: possibly automatically select index intersection instead? - if hasattr(other, 'coordinates'): - for k, v in iteritems(self.coordinates): - if (k in other.coordinates - and not v.equals(other.coordinates[k])): + if hasattr(other, 'coords'): + for k, v in iteritems(self.coords): + if (k in other.coords + and not v.equals(other.coords[k])): raise ValueError('coordinate %r is not aligned' % k) @staticmethod @@ -882,10 +892,10 @@ def func(self, other): # for broadcasting dimensions like 'dayofyear' against 'time' self._check_coords_compat(other) ds = self._select_coords() - if hasattr(other, 'coordinates'): - ds.merge(other.coordinates, inplace=True) + if hasattr(other, 'coords'): + ds.merge(other.coords, inplace=True) other_array = getattr(other, 'variable', other) - if hasattr(other, 'name') or (self.name,) == self.dimensions: + if hasattr(other, 'name') or (self.name,) == self.dims: name = None else: name = self.name @@ -902,8 +912,8 @@ def func(self, other): self._check_coords_compat(other) other_array = getattr(other, 'variable', other) f(self.variable, other_array) - if hasattr(other, 'coordinates'): - self.dataset.merge(other.coordinates, inplace=True) + if hasattr(other, 'coords'): + self.dataset.merge(other.coords, inplace=True) return self return func @@ -966,7 +976,7 @@ def align(*objects, **kwargs): all_indexes = defaultdict(list) for obj in objects: - for k, v in iteritems(obj.coordinates): + for k, v in iteritems(obj.coords): all_indexes[k].append(v.as_index) # Exclude dimensions with all equal indices to avoid unnecessary reindexing diff --git a/xray/dataset.py b/xray/dataset.py index 7066508ea1a..41da436cdc1 100644 --- a/xray/dataset.py +++ b/xray/dataset.py @@ -137,7 +137,7 @@ def __missing__(self, key): data = (month // 3) % 4 + 1 else: data = getattr(date, suffix) - return variable.Variable(ref_var.dimensions, data) + return variable.Variable(ref_var.dims, data) def _as_dataset_variable(name, var): @@ -148,8 +148,8 @@ def _as_dataset_variable(name, var): except TypeError: raise TypeError('Dataset variables must be of type ' 'DataArray or Variable, or a sequence of the ' - 'form (dimensions, data[, attributes, encoding])') - if name in var.dimensions: + 'form (dims, data[, attrs, encoding])') + if name in var.dims: # convert the into an Index if var.ndim != 1: raise ValueError('an index variable must be defined with ' @@ -164,7 +164,7 @@ def _expand_variables(raw_variables, old_variables={}, compat='identical'): Returns a dictionary of Variable objects suitable for inserting into a Dataset._variables dictionary. - This includes converting tuples (dimensions, data) into Variable objects, + This includes converting tuples (dims, data) into Variable objects, converting coordinate variables into Coordinate objects and expanding DataArray objects into Variables plus coordinates. @@ -185,33 +185,33 @@ def add_variable(name, var): for name, var in iteritems(raw_variables): if hasattr(var, 'dataset'): # it's a DataArray - for dim, coord in iteritems(var.coordinates): + for dim, coord in iteritems(var.coords): if dim != name: add_variable(dim, coord) add_variable(name, var) return new_variables -def _calculate_dimensions(variables): +def _calculate_dims(variables): """Calculate the dimensions corresponding to a set of variables. Returns dictionary mapping from dimension names to sizes. Raises ValueError if any of the dimension sizes conflict. """ - dimensions = SortedKeysDict() - scalar_vars = set(k for k, v in iteritems(variables) if not v.dimensions) + dims = SortedKeysDict() + scalar_vars = set(k for k, v in iteritems(variables) if not v.dims) for k, var in iteritems(variables): - for dim, size in zip(var.dimensions, var.shape): + for dim, size in zip(var.dims, var.shape): if dim in scalar_vars: raise ValueError('dimension %s already exists as a scalar ' 'variable' % dim) - if dim not in dimensions: - dimensions[dim] = size - elif dimensions[dim] != size: + if dim not in dims: + dims[dim] = size + elif dims[dim] != size: raise ValueError('dimension %r on variable %r has length ' '%s but already exists with length %s' % - (dim, k, size, dimensions[dim])) - return dimensions + (dim, k, size, dims[dim])) + return dims def _get_dataset_vars_and_attrs(obj): @@ -252,7 +252,7 @@ class DatasetCoordinates(common.AbstractCoordinates): lookups. """ def __getitem__(self, key): - if key in self._data.dimensions: + if key in self._data.dims: return self._data.variables[key] elif isinstance(key, (int, np.integer)): raise KeyError('%r: Dataset coordinates do not support integer ' @@ -289,7 +289,7 @@ class Dataset(Mapping, common.ImplementsDatasetReduce): coordinates, which means they are saved in the dataset as `xray.Coordinate` objects. """ - def __init__(self, variables=None, attributes=None): + def __init__(self, variables=None, attrs=None): """To load data from a file or file-like object, use the `open_dataset` function. @@ -297,26 +297,26 @@ def __init__(self, variables=None, attributes=None): ---------- variables : dict-like, optional A mapping from variable names to `DataArray` objets, `Variable` - objects or sequences of the form `(dimensions, data[, attributes])` + objects or sequences of the form `(dims, data[, attrs])` which can be used as arguments to create a new `Variable`. Each dimension must have the same length in all variables in which it appears. - attributes : dict-like, optional + attrs : dict-like, optional Global attributes to save on this dataset. """ self._variables = VariablesDict() - self._dimensions = SortedKeysDict() - self._attributes = OrderedDict() + self._dims = SortedKeysDict() + self._attrs = OrderedDict() self._file_obj = None if variables is not None: self._set_init_vars_and_dims(variables) - if attributes is not None: - self._attributes.update(attributes) + if attrs is not None: + self._attrs.update(attrs) - def _add_missing_coordinates(self): + def _add_missing_coords(self): """Add missing coordinate variables IN-PLACE to the variables dict """ - for dim, size in iteritems(self._dimensions): + for dim, size in iteritems(self._dims): if dim not in self._variables: coord = variable.Coordinate(dim, np.arange(size)) self._variables[dim] = coord @@ -326,7 +326,7 @@ def _update_vars_and_dims(self, new_variables, needs_copy=True): Raises a ValueError if any dimensions have conflicting lengths in the new dataset. Otherwise will update this dataset's _variables and - _dimensions attributes in-place. + _dims attributes in-place. Set `needs_copy=False` only if this dataset is brand-new and hence can be thrown away if this method fails. @@ -335,11 +335,11 @@ def _update_vars_and_dims(self, new_variables, needs_copy=True): # up with inconsistent dimensions variables = self._variables.copy() if needs_copy else self._variables variables.update(new_variables) - dimensions = _calculate_dimensions(variables) + dims = _calculate_dims(variables) # all checks are complete: it's safe to update self._variables = variables - self._dimensions = dimensions - self._add_missing_coordinates() + self._dims = dims + self._add_missing_coords() def _set_init_vars_and_dims(self, variables): """Set the initial value of Dataset variables and dimensions @@ -403,31 +403,36 @@ def variables(self): @property def attributes(self): utils.alias_warning('attributes', 'attrs', 3) - return self._attributes + return self._attrs @attributes.setter def attributes(self, value): utils.alias_warning('attributes', 'attrs', 3) - self._attributes = OrderedDict(value) + self._attrs = OrderedDict(value) @property def attrs(self): """Dictionary of global attributes on this dataset """ - return self._attributes + return self._attrs @attrs.setter def attrs(self, value): - self._attributes = OrderedDict(value) + self._attrs = OrderedDict(value) @property - def dimensions(self): + def dims(self): """Mapping from dimension names to lengths. This dictionary cannot be modified directly, but is updated when adding new variables. """ - return Frozen(self._dimensions) + return Frozen(self._dims) + + @property + def dimensions(self): + utils.alias_warning('dimensions', 'dims') + return self.dims def load_data(self): """Manually trigger loading of this dataset's data from disk or a @@ -457,8 +462,8 @@ def copy(self, deep=False): # skip __init__ to avoid costly validation obj = self.__new__(type(self)) obj._variables = variables - obj._dimensions = self._dimensions.copy() - obj._attributes = self._attributes.copy() + obj._dims = self._dims.copy() + obj._attrs = self._attrs.copy() obj._file_obj = None return obj @@ -516,7 +521,7 @@ def __setitem__(self, key, value): dataset. If value is an `Variable` object (or tuple of form - `(dimensions, data[, attributes])`), add it to this dataset as a new + ``(dims, data[, attrs])``), add it to this dataset as a new variable. """ self.merge({key: value}, inplace=True, overwrite_vars=[key]) @@ -527,11 +532,11 @@ def __delitem__(self, key): If this variable is a dimension, all variables containing this dimension are also removed. """ - if key in self._dimensions: - del self._dimensions[key] + if key in self._dims: + del self._dims[key] del self._variables[key] also_delete = [k for k, v in iteritems(self._variables) - if key in v.dimensions] + if key in v.dims] for key in also_delete: del self._variables[key] @@ -581,26 +586,25 @@ def identical(self, other): @property def indexes(self): - utils.alias_warning('indexes', 'coordinates', 3) - return self.coordinates + return self.coords @property - def coordinates(self): + def coords(self): """Dictionary of xray.Coordinate objects used for label based indexing. """ return DatasetCoordinates(self) @property - def noncoordinates(self): - utils.alias_warning('noncoordinates', 'noncoordinates', 3) - return self.noncoordinates + def coordinates(self): + utils.alias_warning('coordinates', 'coords', 3) + return self.coords @property def noncoordinates(self): """Dictionary of DataArrays whose names do not match dimensions. """ return FrozenOrderedDict((name, self[name]) for name in self - if name not in self.dimensions) + if name not in self.dims) def dump_to_store(self, store): """Store dataset contents to a backends.*DataStore object.""" @@ -658,7 +662,7 @@ def isel(self, **indexers): DataArray.isel DataArray.sel """ - invalid = [k for k in indexers if not k in self.dimensions] + invalid = [k for k in indexers if not k in self.dims] if invalid: raise ValueError("dimensions %r do not exist" % invalid) @@ -668,7 +672,7 @@ def isel(self, **indexers): variables = OrderedDict() for name, var in iteritems(self.variables): - var_indexers = dict((k, v) for k, v in iteritems(indexers) if k in var.dimensions) + var_indexers = dict((k, v) for k, v in iteritems(indexers) if k in var.dims) variables[name] = var.isel(**var_indexers) return type(self)(variables, self.attrs) @@ -745,11 +749,11 @@ def reindex_like(self, other, copy=True): Dataset.reindex align """ - return self.reindex(copy=copy, **other.coordinates) + return self.reindex(copy=copy, **other.coords) - def reindex(self, copy=True, **coordinates): - """Conform this object onto a new set of coordinates or pandas.Index - objects, filling in missing values with NaN. + def reindex(self, copy=True, **indexers): + """Conform this object onto a new set of coordinates, filling in + missing values with NaN. Parameters ---------- @@ -757,7 +761,7 @@ def reindex(self, copy=True, **coordinates): If `copy=True`, the returned dataset contains only copied variables. If `copy=False` and no reindexing is required then original variables from this dataset are returned. - **coordinates : dict + **indexers : dict Dictionary with keys given by dimension names and values given by arrays of coordinates tick labels. Any mis-matched coordinate values will be filled in with NaN, and any mis-matched dimension names will @@ -773,16 +777,16 @@ def reindex(self, copy=True, **coordinates): Dataset.reindex_like align """ - if not coordinates: + if not indexers: # shortcut return self.copy(deep=True) if copy else self # build up indexers for assignment along each index to_indexers = {} from_indexers = {} - for name, coord in iteritems(self.coordinates): - if name in coordinates: - target = utils.safe_cast_to_index(coordinates[name]) + for name, coord in iteritems(self.coords): + if name in indexers: + target = utils.safe_cast_to_index(indexers[name]) indexer = coord.get_indexer(target) # Note pandas uses negative values from get_indexer to signify @@ -809,7 +813,7 @@ def any_not_full_slices(indexers): return any(not is_full_slice(idx) for idx in indexers) def var_indexers(var, indexers): - return tuple(indexers.get(d, slice(None)) for d in var.dimensions) + return tuple(indexers.get(d, slice(None)) for d in var.dims) def get_fill_value_and_dtype(dtype): # N.B. these casting rules should match pandas @@ -827,11 +831,11 @@ def get_fill_value_and_dtype(dtype): # create variables for the new dataset variables = OrderedDict() for name, var in iteritems(self.variables): - if name in coordinates: - new_var = coordinates[name] - if not (hasattr(new_var, 'dimensions') and + if name in indexers: + new_var = indexers[name] + if not (hasattr(new_var, 'dims') and hasattr(new_var, 'values')): - new_var = variable.Coordinate(var.dimensions, new_var, + new_var = variable.Coordinate(var.dims, new_var, var.attrs, var.encoding) elif copy: new_var = variable.as_variable(new_var).copy() @@ -848,7 +852,7 @@ def get_fill_value_and_dtype(dtype): data[:] = fill_value # create a new Variable so we can use orthogonal indexing new_var = variable.Variable( - var.dimensions, data, var.attrs) + var.dims, data, var.attrs) new_var[assign_to] = var[assign_from].values elif any_not_full_slices(assign_from): # type coercion is not necessary as there are no missing @@ -886,13 +890,13 @@ def rename(self, name_dict, inplace=False): variables = VariablesDict() for k, v in iteritems(self.variables): name = name_dict.get(k, k) - dims = tuple(name_dict.get(dim, dim) for dim in v.dimensions) + dims = tuple(name_dict.get(dim, dim) for dim in v.dims) var = v.copy(deep=False) - var.dimensions = dims + var.dims = dims variables[name] = var if inplace: - self._dimensions = _calculate_dimensions(variables) + self._dims = _calculate_dims(variables) self._variables = variables obj = self else: @@ -1030,7 +1034,7 @@ def drop_vars(self, *names): self._assert_all_in_dataset(names) drop = set(names) drop |= set(k for k, v in iteritems(self.variables) - if any(name in v.dimensions for name in names)) + if any(name in v.dims for name in names)) variables = OrderedDict((k, v) for k, v in iteritems(self.variables) if k not in drop) return type(self)(variables, self.attrs) @@ -1060,12 +1064,12 @@ def groupby(self, group, squeeze=True): group = self[group] return groupby.DatasetGroupBy(self, group, squeeze=squeeze) - def squeeze(self, dimension=None): + def squeeze(self, dim=None): """Return a new dataset with squeezed data. Parameters ---------- - dimension : None or str or tuple of str, optional + dim : None or str or tuple of str, optional Selects a subset of the length one dimensions. If a dimension is selected with length greater than one, an error is raised. If None, all length one dimensions are squeezed. @@ -1085,9 +1089,9 @@ def squeeze(self, dimension=None): -------- numpy.squeeze """ - return utils.squeeze(self, self.dimensions, dimension) + return utils.squeeze(self, self.dims, dim) - def reduce(self, func, dimension=None, keep_attrs=False, **kwargs): + def reduce(self, func, dim=None, keep_attrs=False, **kwargs): """Reduce this dataset by applying `func` along some dimension(s). Parameters @@ -1096,7 +1100,7 @@ def reduce(self, func, dimension=None, keep_attrs=False, **kwargs): Function which can be called in the form `f(x, axis=axis, **kwargs)` to return the result of reducing an np.ndarray over an integer valued axis. - dimension : str or sequence of str, optional + dim : str or sequence of str, optional Dimension(s) over which to apply `func`. By default `func` is applied over all dimensions. keep_attrs : bool, optional @@ -1112,31 +1116,34 @@ def reduce(self, func, dimension=None, keep_attrs=False, **kwargs): Dataset with this object's DataArrays replaced with new DataArrays of summarized data and the indicated dimension(s) removed. """ - - if isinstance(dimension, basestring): - dims = set([dimension]) - elif dimension is None: - dims = set(self.coordinates) + if 'dimension' in kwargs and dim is None: + dim = kwargs.pop('dimension') + utils.alias_warning('dimension', 'dim') + + if isinstance(dim, basestring): + dims = set([dim]) + elif dim is None: + dims = set(self.dims) else: - dims = set(dimension) + dims = set(dim) - bad_dims = [dim for dim in dims if dim not in self.coordinates] + bad_dims = [dim for dim in dims if dim not in self.dims] if bad_dims: raise ValueError('Dataset does not contain the dimensions: ' '{0}'.format(bad_dims)) variables = OrderedDict() for name, var in iteritems(self.variables): - reduce_dims = [dim for dim in var.dimensions if dim in dims] + reduce_dims = [dim for dim in var.dims if dim in dims] if reduce_dims: - if name not in self.dimensions: + if name not in self.dims: if len(reduce_dims) == 1: # unpack dimensions for the benefit of functions like # np.argmin which can't handle tuple arguments reduce_dims, = reduce_dims try: variables[name] = var.reduce(func, - dimension=reduce_dims, + dim=reduce_dims, **kwargs) except TypeError: # array (e.g., string) does not support this reduction, @@ -1150,7 +1157,7 @@ def reduce(self, func, dimension=None, keep_attrs=False, **kwargs): attrs = self.attrs if keep_attrs else {} - return Dataset(variables=variables, attributes=attrs) + return Dataset(variables, attrs) def apply(self, func, to=None, keep_attrs=False, **kwargs): """Apply a function over noncoordinates in this dataset. @@ -1197,7 +1204,7 @@ def apply(self, func, to=None, keep_attrs=False, **kwargs): return Dataset(variables, attrs) @classmethod - def concat(cls, datasets, dimension='concat_dimension', indexers=None, + def concat(cls, datasets, dim='concat_dim', indexers=None, mode='different', concat_over=None, compat='equals'): """Concatenate datasets along a new or existing dimension. @@ -1208,7 +1215,7 @@ def concat(cls, datasets, dimension='concat_dimension', indexers=None, matching attributes, and all variables except those along the stacked dimension (those that contain "dimension" as a dimension or are listed in "concat_over") are expected to be equal. - dimension : str or DataArray, optional + dim : str or DataArray, optional Name of the dimension to stack along. If dimension is provided as an DataArray, the name of the DataArray is used as the stacking dimension and the array is added to the returned dataset. @@ -1251,7 +1258,7 @@ def concat(cls, datasets, dimension='concat_dimension', indexers=None, datasets = list(map(as_dataset, datasets)) if not datasets: raise ValueError('must supply at least one dataset to concatenate') - dim_name = getattr(dimension, 'name', dimension) + dim_name = getattr(dim, 'name', dim) # figure out variables to concatenate over if concat_over is None: @@ -1275,7 +1282,7 @@ def differs(vname, v): # concatenate all noncoordinates concat_over.update(set(datasets[0].noncoordinates.keys())) elif mode == 'minimal': - # only concatenate variables in which 'dimension' already + # only concatenate variables in which 'dim' already # appears. These variables are added later. pass else: @@ -1288,10 +1295,10 @@ def differs(vname, v): # automatically concatenate over variables along the dimension auto_concat_dims = set([dim_name]) - if hasattr(dimension, 'dimensions'): - auto_concat_dims |= set(dimension.dimensions) + if hasattr(dim, 'dims'): + auto_concat_dims |= set(dim.dims) for k, v in iteritems(datasets[0]): - if k == dim_name or auto_concat_dims.intersection(v.dimensions): + if k == dim_name or auto_concat_dims.intersection(v.dims): concat_over.add(k) # create the new dataset and add constant variables @@ -1318,11 +1325,11 @@ def differs(vname, v): # stack up each variable to fill-out the dataset for k in concat_over: concatenated[k] = variable.Variable.concat( - [ds[k] for ds in datasets], dimension, indexers) + [ds[k] for ds in datasets], dim, indexers) - if not isinstance(dimension, basestring): + if not isinstance(dim, basestring): # add dimension last to ensure that its in the final Dataset - concatenated[dim_name] = dimension + concatenated[dim_name] = dim return concatenated @@ -1338,17 +1345,17 @@ def to_dataframe(self): # we need a template to broadcast all dataset variables against # using stride_tricks lets us make the ndarray for broadcasting without # having to allocate memory - shape = tuple(self.dimensions.values()) + shape = tuple(self.dims.values()) empty_data = np.lib.stride_tricks.as_strided(np.array(0), shape=shape, strides=[0] * len(shape)) - template = variable.Variable(self.dimensions.keys(), empty_data) + template = variable.Variable(self.dims.keys(), empty_data) for k in columns: _, var = variable.broadcast_variables(template, self.variables[k]) _, var_data = np.broadcast_arrays(template.values, var.values) data.append(var_data.reshape(-1)) - index = multi_index_from_product(list(self.coordinates.values()), - names=list(self.coordinates.keys())) + index = multi_index_from_product(list(self.coords.values()), + names=list(self.coords.keys())) return pd.DataFrame(OrderedDict(zip(columns, data)), index=index) @classmethod @@ -1375,19 +1382,19 @@ def from_dataframe(cls, dataframe): # expand the DataFrame to include the product of all levels full_idx = multi_index_from_product(idx.levels, idx.names) dataframe = dataframe.reindex(full_idx) - dimensions = [name if name is not None else 'level_%i' % n - for n, name in enumerate(idx.names)] - for dim, lev in zip(dimensions, idx.levels): + dims = [name if name is not None else 'level_%i' % n + for n, name in enumerate(idx.names)] + for dim, lev in zip(dims, idx.levels): obj[dim] = (dim, lev) shape = [lev.size for lev in idx.levels] else: - dimensions = (idx.name if idx.name is not None else 'index',) - obj[dimensions[0]] = (dimensions, idx) + dims = (idx.name if idx.name is not None else 'index',) + obj[dims[0]] = (dims, idx) shape = -1 for name, series in iteritems(dataframe): data = series.values.reshape(shape) - obj[name] = (dimensions, data) + obj[name] = (dims, data) return obj ops.inject_reduce_methods(Dataset) diff --git a/xray/groupby.py b/xray/groupby.py index fcf254160c9..9c460759642 100644 --- a/xray/groupby.py +++ b/xray/groupby.py @@ -78,23 +78,23 @@ def __init__(self, obj, group, squeeze=True): raise ValueError('`group` must be 1 dimensional') if getattr(group, 'name', None) is None: raise ValueError('`group` must have a name') - if not hasattr(group, 'dimensions'): - raise ValueError("`group` must have a 'dimensions' attribute") + if not hasattr(group, 'dims'): + raise ValueError("`group` must have a 'dims' attribute") self.obj = obj self.group = group - self.group_dim, = group.dimensions + self.group_dim, = group.dims from .dataset import as_dataset - expected_size = as_dataset(obj).dimensions[self.group_dim] + expected_size = as_dataset(obj).dims[self.group_dim] if group.size != expected_size: raise ValueError('the group variable\'s length does not ' 'match the length of this variable along its ' 'dimension') - if group.name in obj.dimensions: + if group.name in obj.dims: # assume that group already has sorted, unique values - if group.dimensions != (group.name,): + if group.dims != (group.name,): raise ValueError('`group` is required to be a coordinate if ' '`group.name` is a dimension in `obj`') group_indices = np.arange(group.size) @@ -132,7 +132,7 @@ def _iter_grouped(self): yield self.obj.isel(**{self.group_dim: indices}) def _infer_concat_args(self, applied_example): - if self.group_dim in applied_example.dimensions: + if self.group_dim in applied_example.dims: concat_dim = self.group indexers = self.group_indices else: @@ -158,9 +158,9 @@ def _iter_grouped_shortcut(self): # build the new dimensions if isinstance(self.group_indices[0], (int, np.integer)): # group_dim is squeezed out - dims = tuple(d for d in array.dimensions if d != self.group_dim) + dims = tuple(d for d in array.dims if d != self.group_dim) else: - dims = array.dimensions + dims = array.dims # slice the data and build the new Arrays directly indexer = [slice(None)] * array.ndim @@ -179,8 +179,8 @@ def _combine_shortcut(self, applied, concat_dim, indexers): ds = self.obj.dataset.drop_vars(name) ds[concat_dim.name] = concat_dim # remove extraneous dimensions - for dim in self.obj.dimensions: - if dim not in stacked.dimensions and dim in ds: + for dim in self.obj.dims: + if dim not in stacked.dims and dim in ds: del ds[dim] ds[name] = stacked return ds[name] @@ -188,14 +188,14 @@ def _combine_shortcut(self, applied, concat_dim, indexers): def _restore_dim_order(self, stacked, concat_dim): def lookup_order(dimension): if dimension == self.group.name: - dimension, = concat_dim.dimensions - if dimension in self.obj.dimensions: + dimension, = concat_dim.dims + if dimension in self.obj.dims: axis = self.obj.get_axis_num(dimension) else: axis = 1e6 # some arbitrarily high value return axis - new_order = sorted(stacked.dimensions, key=lookup_order) + new_order = sorted(stacked.dims, key=lookup_order) return stacked.transpose(*new_order) def apply(self, func, shortcut=False, **kwargs): diff --git a/xray/indexing.py b/xray/indexing.py index 172440efa44..d89e9c7d9c0 100644 --- a/xray/indexing.py +++ b/xray/indexing.py @@ -131,8 +131,7 @@ def remap_label_indexers(data_obj, indexers): """Given an xray data object and label based indexers, return a mapping of equivalent location based indexers. """ - return dict((dim, convert_label_indexer(data_obj.coordinates[dim], - label, dim)) + return dict((dim, convert_label_indexer(data_obj.coords[dim], label, dim)) for dim, label in iteritems(indexers)) diff --git a/xray/utils.py b/xray/utils.py index 67868182cef..e16a78f98e6 100644 --- a/xray/utils.py +++ b/xray/utils.py @@ -34,17 +34,17 @@ def __new__(cls, *args, **kwargs): return Wrapper -def squeeze(xray_obj, dimensions, dimension=None): - """Squeeze the dimensions of an xray object.""" - if dimension is None: - dimension = [d for d, s in iteritems(dimensions) if s == 1] +def squeeze(xray_obj, dims, dim=None): + """Squeeze the dims of an xray object.""" + if dim is None: + dim = [d for d, s in iteritems(dims) if s == 1] else: - if isinstance(dimension, basestring): - dimension = [dimension] - if any(dimensions[k] > 1 for k in dimension): + if isinstance(dim, basestring): + dim = [dim] + if any(dims[k] > 1 for k in dim): raise ValueError('cannot select a dimension to squeeze out ' 'which has length greater than one') - return xray_obj.isel(**dict((dim, 0) for dim in dimension)) + return xray_obj.isel(**dict((d, 0) for d in dim)) def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8): diff --git a/xray/variable.py b/xray/variable.py index 2d7b1226699..d9a3b8f1a79 100644 --- a/xray/variable.py +++ b/xray/variable.py @@ -22,7 +22,7 @@ def as_variable(obj, strict=True): - If the object is already an `Variable`, return it. - If the object is a `DataArray`, return it if `strict=False` or return its variable if `strict=True`. - - Otherwise, if the object has 'dimensions' and 'data' attributes, convert + - Otherwise, if the object has 'dims' and 'data' attributes, convert it into a new `Variable`. - If all else fails, attempt to convert the object into an `Variable` by unpacking it into the arguments for `Variable.__init__`. @@ -33,8 +33,8 @@ def as_variable(obj, strict=True): # extract the primary Variable from DataArrays obj = obj.variable if not isinstance(obj, (Variable, xray.DataArray)): - if hasattr(obj, 'dimensions') and hasattr(obj, 'values'): - obj = Variable(obj.dimensions, obj.values, + if hasattr(obj, 'dims') and hasattr(obj, 'values'): + obj = Variable(obj.dims, obj.values, getattr(obj, 'attributes', None), getattr(obj, 'encoding', None)) else: @@ -191,7 +191,7 @@ class Variable(AbstractArray): described outside the context of its parent Dataset (if you want such a fully described object, use a DataArray instead). """ - def __init__(self, dims, data, attributes=None, encoding=None): + def __init__(self, dims, data, attrs=None, encoding=None): """ Parameters ---------- @@ -201,7 +201,7 @@ def __init__(self, dims, data, attributes=None, encoding=None): number of dimensions. data : array_like Data array which supports numpy-like data access. - attributes : dict_like or None, optional + attrs : dict_like or None, optional Attributes to assign to the new variable. If None (default), an empty attribute dictionary is initialized. encoding : dict_like or None, optional @@ -212,10 +212,10 @@ def __init__(self, dims, data, attributes=None, encoding=None): unrecognized encoding items. """ self._data = _as_compatible_data(data) - self._dimensions = self._parse_dimensions(dims) - if attributes is None: - attributes = {} - self._attributes = OrderedDict(attributes) + self._dims = self._parse_dimensions(dims) + if attrs is None: + attrs = {} + self._attrs = OrderedDict(attrs) self._encoding = dict({} if encoding is None else encoding) @property @@ -280,7 +280,7 @@ def values(self, values): def to_coord(self): """Return this variable as an xray.Coordinate""" - return Coordinate(self.dimensions, self._data, self.attrs, + return Coordinate(self.dims, self._data, self.attrs, encoding=self.encoding) @property @@ -289,13 +289,18 @@ def as_index(self): # n.b. creating a new pandas.Index from an old pandas.Index is # basically free as pandas.Index objcets are immutable assert self.ndim == 1 - return pd.Index(self._data_cached().array, name=self.dimensions[0]) + return pd.Index(self._data_cached().array, name=self.dims[0]) @property - def dimensions(self): + def dims(self): """Tuple of dimension names with which this variable is associated. """ - return self._dimensions + return self._dims + + @property + def dimensions(self): + utils.alias_warning('dimensions', 'dims') + return self.dims def _parse_dimensions(self, dims): if isinstance(dims, basestring): @@ -307,9 +312,9 @@ def _parse_dimensions(self, dims): % (dims, self.ndim)) return dims - @dimensions.setter - def dimensions(self, value): - self._dimensions = self._parse_dimensions(value) + @dims.setter + def dims(self, value): + self._dims = self._parse_dimensions(value) def __getitem__(self, key): """Return a new Array object whose contents are consistent with @@ -329,15 +334,15 @@ def __getitem__(self, key): array `x.values` directly. """ key = indexing.expanded_indexer(key, self.ndim) - dimensions = [dim for k, dim in zip(key, self.dimensions) + dims = [dim for k, dim in zip(key, self.dims) if not isinstance(k, (int, np.integer))] values = self._data[key] # orthogonal indexing should ensure the dimensionality is consistent if hasattr(values, 'ndim'): - assert values.ndim == len(dimensions), (values.ndim, len(dimensions)) + assert values.ndim == len(dims), (values.ndim, len(dims)) else: - assert len(dimensions) == 0, len(dimensions) - return type(self)(dimensions, values, self.attrs) + assert len(dims) == 0, len(dims) + return type(self)(dims, values, self.attrs) def __setitem__(self, key, value): """__setitem__ is overloaded to access the underlying numpy values with @@ -361,11 +366,11 @@ def attributes(self, value): def attrs(self): """Dictionary of local attributes on this variable. """ - return self._attributes + return self._attrs @attrs.setter def attrs(self, value): - self._attributes = OrderedDict(value) + self._attrs = OrderedDict(value) @property def encoding(self): @@ -385,9 +390,9 @@ def copy(self, deep=True): """ data = self.values.copy() if deep else self._data # note: - # dimensions is already an immutable tuple + # dims is already an immutable tuple # attributes and encoding will be copied when the new Array is created - return type(self)(self.dimensions, data, self.attrs, self.encoding) + return type(self)(self.dims, data, self.attrs, self.encoding) def __copy__(self): return self.copy(deep=False) @@ -417,24 +422,24 @@ def isel(self, **indexers): unless numpy fancy indexing was triggered by using an array indexer, in which case the data will be a copy. """ - invalid = [k for k in indexers if not k in self.dimensions] + invalid = [k for k in indexers if not k in self.dims] if invalid: raise ValueError("dimensions %r do not exist" % invalid) key = [slice(None)] * self.ndim - for i, dim in enumerate(self.dimensions): + for i, dim in enumerate(self.dims): if dim in indexers: key[i] = indexers[dim] return self[tuple(key)] indexed = utils.function_alias(isel, 'indexed') - def transpose(self, *dimensions): + def transpose(self, *dims): """Return a new Variable object with transposed dimensions. Parameters ---------- - *dimensions : str, optional + *dims : str, optional By default, reverse the dimensions. Otherwise, reorder the dimensions to this order. @@ -453,18 +458,18 @@ def transpose(self, *dimensions): -------- numpy.transpose """ - if len(dimensions) == 0: - dimensions = self.dimensions[::-1] - axes = self.get_axis_num(dimensions) + if len(dims) == 0: + dims = self.dims[::-1] + axes = self.get_axis_num(dims) data = self.values.transpose(*axes) - return type(self)(dimensions, data, self.attrs, self.encoding) + return type(self)(dims, data, self.attrs, self.encoding) - def squeeze(self, dimension=None): + def squeeze(self, dim=None): """Return a new Variable object with squeezed data. Parameters ---------- - dimensions : None or str or tuple of str, optional + dim : None or str or tuple of str, optional Selects a subset of the length one dimensions. If a dimension is selected with length greater than one, an error is raised. If None, all length one dimensions are squeezed. @@ -484,10 +489,10 @@ def squeeze(self, dimension=None): -------- numpy.squeeze """ - dimensions = dict(zip(self.dimensions, self.shape)) - return utils.squeeze(self, dimensions, dimension) + dims = dict(zip(self.dims, self.shape)) + return utils.squeeze(self, dims, dim) - def reduce(self, func, dimension=None, axis=None, keep_attrs=False, + def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs): """Reduce this array by applying `func` along some dimension(s). @@ -497,10 +502,10 @@ def reduce(self, func, dimension=None, axis=None, keep_attrs=False, Function which can be called in the form `func(x, axis=axis, **kwargs)` to return the result of reducing an np.ndarray over an integer valued axis. - dimension : str or sequence of str, optional + dim : str or sequence of str, optional Dimension(s) over which to apply `func`. axis : int or sequence of int, optional - Axis(es) over which to apply `func`. Only one of the 'dimension' + Axis(es) over which to apply `func`. Only one of the 'dim' and 'axis' arguments can be supplied. If neither are supplied, then the reduction is calculated over the flattened array (by calling `func(x)` without an axis argument). @@ -517,27 +522,29 @@ def reduce(self, func, dimension=None, axis=None, keep_attrs=False, Array with summarized data and the indicated dimension(s) removed. """ + if 'dimension' in kwargs and dim is None: + dim = kwargs.pop('dimension') + utils.alias_warning('dimension', 'dim') - if dimension is not None and axis is not None: - raise ValueError("cannot supply both 'axis' and 'dimension' " - "arguments") + if dim is not None and axis is not None: + raise ValueError("cannot supply both 'axis' and 'dim' arguments") - if dimension is not None: - axis = self.get_axis_num(dimension) + if dim is not None: + axis = self.get_axis_num(dim) data = func(self.values, axis=axis, **kwargs) removed_axes = (range(self.ndim) if axis is None else np.atleast_1d(axis) % self.ndim) - dims = [dim for n, dim in enumerate(self.dimensions) + dims = [dim for n, dim in enumerate(self.dims) if n not in removed_axes] attrs = self.attrs if keep_attrs else {} - return Variable(dims, data, attributes=attrs) + return Variable(dims, data, attrs=attrs) @classmethod - def concat(cls, variables, dimension='stacked_dimension', - indexers=None, length=None, shortcut=False): + def concat(cls, variables, dim='concat_dim', indexers=None, length=None, + shortcut=False): """Concatenate variables along a new or existing dimension. Parameters @@ -546,7 +553,7 @@ def concat(cls, variables, dimension='stacked_dimension', Arrays to stack together. Each variable is expected to have matching dimensions and shape except for along the stacked dimension. - dimension : str or DataArray, optional + dim : str or DataArray, optional Name of the dimension to stack along. This can either be a new dimension name, in which case it is added along axis=0, or an existing dimension name, in which case the location of the @@ -575,17 +582,17 @@ def concat(cls, variables, dimension='stacked_dimension', Concatenated Variable formed by stacking all the supplied variables along the given dimension. """ - if not isinstance(dimension, basestring): - length = dimension.size - dimension, = dimension.dimensions + if not isinstance(dim, basestring): + length = dim.size + dim, = dim.dims if length is None or indexers is None: # so much for lazy evaluation! we need to look at all the variables # to figure out the indexers and/or dimensions of the stacked # variable variables = list(variables) - steps = [var.shape[var.get_axis_num(dimension)] - if dimension in var.dimensions else 1 + steps = [var.shape[var.get_axis_num(dim)] + if dim in var.dims else 1 for var in variables] if length is None: length = sum(steps) @@ -598,35 +605,35 @@ def concat(cls, variables, dimension='stacked_dimension', if i != length: raise ValueError('actual length of stacked variables ' 'along %s is %r but expected length was ' - '%s' % (dimension, i, length)) + '%s' % (dim, i, length)) # initialize the stacked variable with empty data from . import groupby first_var, variables = groupby.peek_at(variables) - if dimension in first_var.dimensions: - axis = first_var.get_axis_num(dimension) + if dim in first_var.dims: + axis = first_var.get_axis_num(dim) shape = tuple(length if n == axis else s for n, s in enumerate(first_var.shape)) - dims = first_var.dimensions + dims = first_var.dims else: axis = 0 shape = (length,) + first_var.shape - dims = (dimension,) + first_var.dimensions + dims = (dim,) + first_var.dims concatenated = cls(dims, np.empty(shape, dtype=first_var.dtype)) concatenated.attrs.update(first_var.attrs) - alt_dims = tuple(d for d in dims if d != dimension) + alt_dims = tuple(d for d in dims if d != dim) # copy in the data from the variables for var, indexer in izip(variables, indexers): if not shortcut: # do sanity checks & attributes clean-up - if dimension in var.dimensions: - # transpose verifies that the dimensions are equivalent - if var.dimensions != concatenated.dimensions: - var = var.transpose(*concatenated.dimensions) - elif var.dimensions != alt_dims: + if dim in var.dims: + # transpose verifies that the dims are equivalent + if var.dims != concatenated.dims: + var = var.transpose(*concatenated.dims) + elif var.dims != alt_dims: raise ValueError('inconsistent dimensions') utils.remove_incompatible_items(concatenated.attrs, var.attrs) @@ -652,7 +659,7 @@ def equals(self, other): """ other = getattr(other, 'variable', other) try: - return (self.dimensions == other.dimensions + return (self.dims == other.dims and self._data_equals(other)) except (TypeError, AttributeError): return False @@ -667,13 +674,13 @@ def identical(self, other): return False def __array_wrap__(self, obj, context=None): - return Variable(self.dimensions, obj) + return Variable(self.dims, obj) @staticmethod def _unary_op(f): @functools.wraps(f) def func(self, *args, **kwargs): - return Variable(self.dimensions, f(self.values, *args, **kwargs)) + return Variable(self.dims, f(self.values, *args, **kwargs)) return func @staticmethod @@ -694,7 +701,7 @@ def _inplace_binary_op(f): @functools.wraps(f) def func(self, other): self_data, other_data, dims = _broadcast_variable_data(self, other) - if dims != self.dimensions: + if dims != self.dims: raise ValueError('dimensions cannot change for in-place ' 'operations') self.values = f(self_data, other_data) @@ -717,13 +724,13 @@ class Coordinate(Variable): """ _cache_data_class = PandasIndexAdapter - def __init__(self, name, data, attributes=None, encoding=None): + def __init__(self, name, data, attrs=None, encoding=None): if isinstance(data, pd.MultiIndex): raise NotImplementedError( 'no support yet for using a pandas.MultiIndex in an ' 'xray.Coordinate') - super(Coordinate, self).__init__(name, data, attributes, encoding) + super(Coordinate, self).__init__(name, data, attrs, encoding) if self.ndim != 1: raise ValueError('%s objects must be 1-dimensional' % type(self).__name__) @@ -733,8 +740,7 @@ def __getitem__(self, key): if not hasattr(values, 'ndim') or values.ndim == 0: return Variable((), values, self.attrs, self.encoding) else: - return type(self)(self.dimensions, values, self.attrs, - self.encoding) + return type(self)(self.dims, values, self.attrs, self.encoding) def __setitem__(self, key, value): raise TypeError('%s values cannot be modified' % type(self).__name__) @@ -748,7 +754,7 @@ def copy(self, deep=True): # there is no need to copy the index values here even if deep=True # since pandas.Index objects are immutable data = PandasIndexAdapter(self) if deep else self._data - return type(self)(self.dimensions, data, self.attrs, self.encoding) + return type(self)(self.dims, data, self.attrs, self.encoding) def _data_equals(self, other): return self.as_index.equals(other.as_index) @@ -761,7 +767,7 @@ def to_coord(self): @property def name(self): - return self.dimensions[0] + return self.dims[0] @name.setter def name(self, value): @@ -811,33 +817,32 @@ def broadcast_variables(first, second): """ # TODO: add unit tests specifically for this function # validate dimensions - dim_lengths = dict(zip(first.dimensions, first.shape)) - for k, v in zip(second.dimensions, second.shape): + dim_lengths = dict(zip(first.dims, first.shape)) + for k, v in zip(second.dims, second.shape): if k in dim_lengths and dim_lengths[k] != v: raise ValueError('operands could not be broadcast together ' 'with mismatched lengths for dimension %r: %s' % (k, (dim_lengths[k], v))) - for dimensions in [first.dimensions, second.dimensions]: - if len(set(dimensions)) < len(dimensions): + for dims in [first.dims, second.dims]: + if len(set(dims)) < len(dims): raise ValueError('broadcasting requires that neither operand ' - 'has duplicate dimensions: %r' - % list(dimensions)) + 'has duplicate dimensions: %r' % list(dims)) # build dimensions for new Array - second_only_dims = [d for d in second.dimensions - if d not in first.dimensions] - dimensions = list(first.dimensions) + second_only_dims + second_only_dims = [d for d in second.dims + if d not in first.dims] + dims = list(first.dims) + second_only_dims # expand first_data's dimensions so it's broadcast compatible after # adding second's dimensions at the end first_data = first.values[(Ellipsis,) + (None,) * len(second_only_dims)] - new_first = Variable(dimensions, first_data, first.attrs, first.encoding) + new_first = Variable(dims, first_data, first.attrs, first.encoding) # expand and reorder second_data so the dimensions line up - first_only_dims = [d for d in dimensions if d not in second.dimensions] - second_dims = list(second.dimensions) + first_only_dims + first_only_dims = [d for d in dims if d not in second.dims] + second_dims = list(second.dims) + first_only_dims second_data = second.values[(Ellipsis,) + (None,) * len(first_only_dims)] new_second = Variable(second_dims, second_data, second.attrs, - second.encoding).transpose(*dimensions) + second.encoding).transpose(*dims) return new_first, new_second @@ -845,15 +850,15 @@ def _broadcast_variable_data(self, other): if isinstance(other, xray.Dataset): raise TypeError('datasets do not support mathematical operations') elif all(hasattr(other, attr) for attr - in ['dimensions', 'values', 'shape', 'encoding']): + in ['dims', 'values', 'shape', 'encoding']): # `other` satisfies the necessary Variable API for broadcast_variables new_self, new_other = broadcast_variables(self, other) self_data = new_self.values other_data = new_other.values - dimensions = new_self.dimensions + dims = new_self.dims else: # rely on numpy broadcasting rules self_data = self.values other_data = other - dimensions = self.dimensions - return self_data, other_data, dimensions + dims = self.dims + return self_data, other_data, dims