Skip to content

Commit 120e039

Browse files
authored
Merge pull request #1658 from shoyer/deprecate-Dataset-iter
Add a FutureWarning to Dataset.__iter__ and Dataset.__len__
2 parents 6229b90 + 71db9ca commit 120e039

10 files changed

+94
-50
lines changed

doc/data-structures.rst

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -310,18 +310,15 @@ You can also create an dataset from:
310310
Dataset contents
311311
~~~~~~~~~~~~~~~~
312312

313-
:py:class:`~xarray.Dataset` implements the Python dictionary interface, with
313+
:py:class:`~xarray.Dataset` implements the Python mapping interface, with
314314
values given by :py:class:`xarray.DataArray` objects:
315315

316316
.. ipython:: python
317317
318318
'temperature' in ds
319-
320-
ds.keys()
321-
322319
ds['temperature']
323320
324-
The valid keys include each listed coordinate and data variable.
321+
Valid keys include each listed coordinate and data variable.
325322

326323
Data and coordinate variables are also contained separately in the
327324
:py:attr:`~xarray.Dataset.data_vars` and :py:attr:`~xarray.Dataset.coords`
@@ -356,6 +353,13 @@ setting) variables and attributes:
356353
This is particularly useful in an exploratory context, because you can
357354
tab-complete these variable names with tools like IPython.
358355

356+
.. warning::
357+
358+
We are changing the behavior of iterating over a Dataset the next major
359+
release of xarray, to only include data variables instead of both data
360+
variables and coordinates. In the meantime, prefer iterating over
361+
``ds.data_vars`` or ``ds.coords``.
362+
359363
Dictionary like methods
360364
~~~~~~~~~~~~~~~~~~~~~~~
361365

doc/whats-new.rst

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,14 +78,23 @@ Breaking changes
7878
disk when calling ``repr`` (:issue:`1522`).
7979
By `Guido Imperiale <https://github.com/crusaderky>`_.
8080

81-
- Deprecations:
81+
- Several existing features have been deprecated and will change to new
82+
behavior in xarray v0.11. If you use any of them with xarray v0.10, you
83+
should see a ``FutureWarning`` that describes how to update your code:
8284

8385
- ``Dataset.T`` has been deprecated an alias for ``Dataset.transpose()``
84-
(:issue:`1232`).
85-
- ``key in data_array`` currently checks for membership in
86-
``data_array.coords``. This is now deprecated: in the future, it will check
87-
membership in ``data_array.values`` instead.
88-
86+
(:issue:`1232`). In the next major version of xarray, it will provide short-
87+
cut lookup for variables or attributes with name ``'T'``.
88+
- ``DataArray.__contains__`` (e.g., ``key in data_array``) currently checks
89+
for membership in ``DataArray.coords``. In the next major version of
90+
xarray, it will check membership in the array data found in
91+
``DataArray.values`` instead (:issue:`1267`).
92+
- Direct iteration over and counting a ``Dataset`` (e.g., ``[k for k in ds]``,
93+
``ds.keys()``, ``ds.values()``, ``len(ds)`` and ``if ds``) currently
94+
includes all variables, both data and coordinates. For improved usability
95+
and consistency with pandas, in the next major version of xarray these will
96+
change to only include data variables (:issue:`884`). Use ``ds.variables``,
97+
``ds.data_vars`` or `ds.coords`` as alternatives.
8998

9099
Backward Incompatible Changes
91100
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

xarray/backends/api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def check_name(name):
8686
raise TypeError('DataArray.name or Dataset key must be either a '
8787
'string or None for serialization to netCDF files')
8888

89-
for k in dataset:
89+
for k in dataset.variables:
9090
check_name(k)
9191

9292

xarray/core/dataarray.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1844,7 +1844,7 @@ def dot(self, other):
18441844
new_dims = ([d for d in self.dims if d not in dims] +
18451845
[d for d in other.dims if d not in dims])
18461846

1847-
return type(self)(new_data, new_coords, new_dims)
1847+
return type(self)(new_data, new_coords.variables, new_dims)
18481848

18491849
def sortby(self, variables, ascending=True):
18501850
"""

xarray/core/dataset.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,9 @@ def _set_init_vars_and_dims(self, data_vars, coords, compat):
373373
raise ValueError('variables %r are found in both data_vars and '
374374
'coords' % both_data_and_coords)
375375

376+
if isinstance(coords, Dataset):
377+
coords = coords.variables
378+
376379
variables, coord_names, dims = merge_data_and_coords(
377380
data_vars, coords, compat=compat)
378381

@@ -725,7 +728,7 @@ def _attr_sources(self):
725728
@property
726729
def _item_sources(self):
727730
"""List of places to look-up items for key-completion"""
728-
return [self, {d: self[d] for d in self.dims},
731+
return [self.data_vars, self.coords, {d: self[d] for d in self.dims},
729732
LevelCoordinatesSource(self)]
730733

731734
def __contains__(self, key):
@@ -735,9 +738,31 @@ def __contains__(self, key):
735738
return key in self._variables
736739

737740
def __len__(self):
741+
warnings.warn('calling len() on an xarray.Dataset will change in '
742+
'xarray v0.11 to only include data variables, not '
743+
'coordinates. Call len() on the Dataset.variables '
744+
'property instead, like ``len(ds.variables)``, to '
745+
'preserve existing behavior in a forwards compatible '
746+
'manner.',
747+
FutureWarning, stacklevel=2)
738748
return len(self._variables)
739749

750+
def __bool__(self):
751+
warnings.warn('casting an xarray.Dataset to a boolean will change in '
752+
'xarray v0.11 to only include data variables, not '
753+
'coordinates. Cast the Dataset.variables property '
754+
'instead to preserve existing behavior in a forwards '
755+
'compatible manner.',
756+
FutureWarning, stacklevel=2)
757+
return bool(self._variables)
758+
740759
def __iter__(self):
760+
warnings.warn('iteration over an xarray.Dataset will change in xarray '
761+
'v0.11 to only include data variables, not coordinates. '
762+
'Iterate over the Dataset.variables property instead to '
763+
'preserve existing behavior in a forwards compatible '
764+
'manner.',
765+
FutureWarning, stacklevel=2)
741766
return iter(self._variables)
742767

743768
@property
@@ -2202,8 +2227,7 @@ def transpose(self, *dims):
22022227
@property
22032228
def T(self):
22042229
warnings.warn('xarray.Dataset.T has been deprecated as an alias for '
2205-
'`.transpose()`. It will be removed in a future version '
2206-
'of xarray.',
2230+
'`.transpose()`. It will be removed in xarray v0.11.',
22072231
FutureWarning, stacklevel=2)
22082232
return self.transpose()
22092233

@@ -2476,7 +2500,7 @@ def to_array(self, dim='variable', name=None):
24762500
return DataArray(data, coords, dims, attrs=self.attrs, name=name)
24772501

24782502
def _to_dataframe(self, ordered_dims):
2479-
columns = [k for k in self if k not in self.dims]
2503+
columns = [k for k in self.variables if k not in self.dims]
24802504
data = [self._variables[k].set_dims(ordered_dims).values.reshape(-1)
24812505
for k in columns]
24822506
index = self.coords.to_index(ordered_dims)

xarray/core/formatting.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ def array_repr(arr):
411411
def dataset_repr(ds):
412412
summary = [u'<xarray.%s>' % type(ds).__name__]
413413

414-
col_width = _calculate_col_width(_get_col_items(ds))
414+
col_width = _calculate_col_width(_get_col_items(ds.variables))
415415

416416
dims_start = pretty_print(u'Dimensions:', col_width)
417417
summary.append(u'%s(%s)' % (dims_start, dim_summary(ds)))

xarray/testing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def assert_allclose(a, b, rtol=1e-05, atol=1e-08, decode_bytes=True):
132132
assert allclose, '{}\n{}'.format(a.coords[v].values,
133133
b.coords[v].values)
134134
elif isinstance(a, xr.Dataset):
135-
assert set(a) == set(b)
135+
assert set(a.data_vars) == set(b.data_vars)
136136
assert set(a.coords) == set(b.coords)
137137
for k in list(a.variables) + list(a.coords):
138138
assert_allclose(a[k], b[k], **kwargs)

xarray/tests/test_backends.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ def test_write_store(self):
182182
self.assertDatasetAllClose(expected, actual)
183183

184184
def check_dtypes_roundtripped(self, expected, actual):
185-
for k in expected:
185+
for k in expected.variables:
186186
expected_dtype = expected.variables[k].dtype
187187
if (isinstance(self, Only32BitTypes) and
188188
expected_dtype == 'int64'):
@@ -878,7 +878,7 @@ def test_variable_order(self):
878878
ds.coords['c'] = 4
879879

880880
with self.roundtrip(ds) as actual:
881-
self.assertEqual(list(ds), list(actual))
881+
self.assertEqual(list(ds.variables), list(actual.variables))
882882

883883
def test_unsorted_index_raises(self):
884884
# should be fixed in netcdf4 v1.2.1
@@ -1010,7 +1010,7 @@ def test_roundtrip_example_1_netcdf_gz(self):
10101010
def test_netcdf3_endianness(self):
10111011
# regression test for GH416
10121012
expected = open_example_dataset('bears.nc', engine='scipy')
1013-
for var in expected.values():
1013+
for var in expected.variables.values():
10141014
self.assertTrue(var.dtype.isnative)
10151015

10161016
@requires_netCDF4
@@ -1106,11 +1106,12 @@ def test_cross_engine_read_write_netcdf3(self):
11061106
with open_dataset(tmp_file,
11071107
engine=read_engine) as actual:
11081108
# hack to allow test to work:
1109-
# coord comes back as DataArray rather than coord, and so
1110-
# need to loop through here rather than in the test
1111-
# function (or we get recursion)
1112-
[assert_allclose(data[k].variable, actual[k].variable)
1113-
for k in data]
1109+
# coord comes back as DataArray rather than coord,
1110+
# and so need to loop through here rather than in
1111+
# the test function (or we get recursion)
1112+
[assert_allclose(data[k].variable,
1113+
actual[k].variable)
1114+
for k in data.variables]
11141115

11151116
def test_encoding_unlimited_dims(self):
11161117
ds = Dataset({'x': ('y', np.arange(10.0))})

xarray/tests/test_combine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def test_concat(self):
1919

2020
# drop the third dimension to keep things relatively understandable
2121
data = create_test_data()
22-
for k in list(data):
22+
for k in list(data.variables):
2323
if 'dim3' in data[k].dims:
2424
del data[k]
2525

xarray/tests/test_dataset.py

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def create_test_data(seed=None):
5151
obj.coords['numbers'] = ('dim3', np.array([0, 1, 2, 0, 0, 1, 1, 2, 2, 3],
5252
dtype='int64'))
5353
obj.encoding = {'foo': 'bar'}
54-
assert all(obj.data.flags.writeable for obj in obj.values())
54+
assert all(obj.data.flags.writeable for obj in obj.variables.values())
5555
return obj
5656

5757

@@ -410,11 +410,16 @@ def test_properties(self):
410410
self.assertIsInstance(ds.dims.mapping, utils.SortedKeysDict)
411411
self.assertIs(type(ds.dims.mapping.mapping), dict)
412412

413-
self.assertItemsEqual(ds, list(ds.variables))
414-
self.assertItemsEqual(ds.keys(), list(ds.variables))
413+
with pytest.warns(FutureWarning):
414+
self.assertItemsEqual(ds, list(ds.variables))
415+
with pytest.warns(FutureWarning):
416+
self.assertItemsEqual(ds.keys(), list(ds.variables))
415417
self.assertNotIn('aasldfjalskdfj', ds.variables)
416418
self.assertIn('dim1', repr(ds.variables))
417-
self.assertEqual(len(ds), 7)
419+
with pytest.warns(FutureWarning):
420+
self.assertEqual(len(ds), 7)
421+
with pytest.warns(FutureWarning):
422+
self.assertEqual(bool(ds), True)
418423

419424
self.assertItemsEqual(ds.data_vars, ['var1', 'var2', 'var3'])
420425
self.assertItemsEqual(ds.data_vars.keys(), ['var1', 'var2', 'var3'])
@@ -470,7 +475,7 @@ def test_variable(self):
470475
self.assertTrue('foo' in a)
471476
a['bar'] = (('time', 'x',), d)
472477
# order of creation is preserved
473-
self.assertEqual(list(a), ['foo', 'bar'])
478+
self.assertEqual(list(a.variables), ['foo', 'bar'])
474479
self.assertArrayEqual(a['foo'].values, d)
475480
# try to add variable with dim (10,3) with data that's (3,10)
476481
with self.assertRaises(ValueError):
@@ -819,7 +824,7 @@ def test_isel(self):
819824
else:
820825
self.assertEqual(data.dims[d], ret.dims[d])
821826
# Verify that the data is what we expect
822-
for v in data:
827+
for v in data.variables:
823828
self.assertEqual(data[v].dims, ret[v].dims)
824829
self.assertEqual(data[v].attrs, ret[v].attrs)
825830
slice_list = [slice(None)] * data[v].values.ndim
@@ -1801,7 +1806,8 @@ def test_drop_variables(self):
18011806

18021807
self.assertDatasetIdentical(data, data.drop([]))
18031808

1804-
expected = Dataset(dict((k, data[k]) for k in data if k != 'time'))
1809+
expected = Dataset(dict((k, data[k]) for k in data.variables
1810+
if k != 'time'))
18051811
actual = data.drop('time')
18061812
self.assertDatasetIdentical(expected, actual)
18071813
actual = data.drop(['time'])
@@ -1848,8 +1854,7 @@ def test_copy(self):
18481854

18491855
for copied in [data.copy(deep=True), deepcopy(data)]:
18501856
self.assertDatasetIdentical(data, copied)
1851-
for k in data:
1852-
v0 = data.variables[k]
1857+
for k, v0 in data.variables.items():
18531858
v1 = copied.variables[k]
18541859
self.assertIsNot(v0, v1)
18551860

@@ -2304,30 +2309,30 @@ def test_setitem_align_new_indexes(self):
23042309

23052310
def test_assign(self):
23062311
ds = Dataset()
2307-
actual = ds.assign(x = [0, 1, 2], y = 2)
2312+
actual = ds.assign(x=[0, 1, 2], y=2)
23082313
expected = Dataset({'x': [0, 1, 2], 'y': 2})
23092314
self.assertDatasetIdentical(actual, expected)
2310-
self.assertEqual(list(actual), ['x', 'y'])
2315+
self.assertEqual(list(actual.variables), ['x', 'y'])
23112316
self.assertDatasetIdentical(ds, Dataset())
23122317

2313-
actual = actual.assign(y = lambda ds: ds.x ** 2)
2318+
actual = actual.assign(y=lambda ds: ds.x ** 2)
23142319
expected = Dataset({'y': ('x', [0, 1, 4]), 'x': [0, 1, 2]})
23152320
self.assertDatasetIdentical(actual, expected)
23162321

2317-
actual = actual.assign_coords(z = 2)
2322+
actual = actual.assign_coords(z=2)
23182323
expected = Dataset({'y': ('x', [0, 1, 4])}, {'z': 2, 'x': [0, 1, 2]})
23192324
self.assertDatasetIdentical(actual, expected)
23202325

23212326
ds = Dataset({'a': ('x', range(3))}, {'b': ('x', ['A'] * 2 + ['B'])})
2322-
actual = ds.groupby('b').assign(c = lambda ds: 2 * ds.a)
2327+
actual = ds.groupby('b').assign(c=lambda ds: 2 * ds.a)
23232328
expected = ds.merge({'c': ('x', [0, 2, 4])})
23242329
self.assertDatasetIdentical(actual, expected)
23252330

2326-
actual = ds.groupby('b').assign(c = lambda ds: ds.a.sum())
2331+
actual = ds.groupby('b').assign(c=lambda ds: ds.a.sum())
23272332
expected = ds.merge({'c': ('x', [1, 1, 2])})
23282333
self.assertDatasetIdentical(actual, expected)
23292334

2330-
actual = ds.groupby('b').assign_coords(c = lambda ds: ds.a.sum())
2335+
actual = ds.groupby('b').assign_coords(c=lambda ds: ds.a.sum())
23312336
expected = expected.set_coords('c')
23322337
self.assertDatasetIdentical(actual, expected)
23332338

@@ -2385,12 +2390,13 @@ def test_setitem_multiindex_level(self):
23852390

23862391
def test_delitem(self):
23872392
data = create_test_data()
2388-
all_items = set(data)
2389-
self.assertItemsEqual(data, all_items)
2393+
all_items = set(data.variables)
2394+
self.assertItemsEqual(data.variables, all_items)
23902395
del data['var1']
2391-
self.assertItemsEqual(data, all_items - set(['var1']))
2396+
self.assertItemsEqual(data.variables, all_items - set(['var1']))
23922397
del data['numbers']
2393-
self.assertItemsEqual(data, all_items - set(['var1', 'numbers']))
2398+
self.assertItemsEqual(data.variables,
2399+
all_items - set(['var1', 'numbers']))
23942400
self.assertNotIn('numbers', data.coords)
23952401

23962402
def test_squeeze(self):
@@ -3586,12 +3592,12 @@ def test_dataset_transpose(self):
35863592

35873593
ds = create_test_data()
35883594
actual = ds.transpose()
3589-
for k in ds:
3595+
for k in ds.variables:
35903596
self.assertEqual(actual[k].dims[::-1], ds[k].dims)
35913597

35923598
new_order = ('dim2', 'dim3', 'dim1', 'time')
35933599
actual = ds.transpose(*new_order)
3594-
for k in ds:
3600+
for k in ds.variables:
35953601
expected_dims = tuple(d for d in new_order if d in ds[k].dims)
35963602
self.assertEqual(actual[k].dims, expected_dims)
35973603

0 commit comments

Comments
 (0)