Skip to content

Commit 5c056c6

Browse files
committed
Deprecated 'attributes' in favor of 'attrs'
Also: 1. Don't try to preserve attributes under mathematical operations. 2. Finish up some cleanup related to "equals" and "identical" for testing. 3. Options for how strictly to compare varaibles when merging or concatenating (see #25). Fixes #103 and #104.
1 parent 9744aaf commit 5c056c6

17 files changed

+339
-350
lines changed

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ makes many powerful array operations possible:
2626
- Database like aligment based on coordinate labels that smoothly
2727
handles missing values: `x, y = xray.align(x, y, join='outer')`.
2828
- Keep track of arbitrary metadata in the form of a Python dictionary:
29-
`x.attributes`.
29+
`x.attrs`.
3030

3131
**xray** aims to provide a data analysis toolkit as powerful as
3232
[pandas][pandas] but designed for working with homogeneous N-dimensional
@@ -103,7 +103,7 @@ several limitations that led us to build xray instead of extending Iris:
103103
attempts to build all functionality (`Coord` supports a much more
104104
limited set of functionality). xray has its equivalent of the Cube
105105
(the `DataArray` object), but under the hood it is only thin wrapper
106-
on the more primitive building blocks of Dataset and XArray objects.
106+
on the more primitive building blocks of Dataset and Variable objects.
107107
2. Iris has a strict interpretation of [CF conventions][cf], which,
108108
although a principled choice, we have found to be impractical for
109109
everyday uses. With Iris, every quantity has physical (SI) units, all
@@ -145,10 +145,10 @@ labeled numpy arrays that provided some guidance for the design of xray.
145145
enough. The goal is to be as fast as pandas or raw numpy.
146146
- Provide a uniform API for loading and saving scientific data in a variety
147147
of formats (including streaming data).
148-
- Understand metadata according to [Climate and Forecast Conventions][cf]
149-
when appropriate, but don't strictly enforce them. Conflicting attributes
150-
(e.g., units) should be silently dropped instead of causing errors. The
151-
onus is on the user to make sure that operations make sense.
148+
- Take a pragmatic approach to metadata (attributes), and be very cautious
149+
before implementing any functionality that relies on it. Automatically
150+
maintaining attributes is a tricky and very hard to get right (see
151+
discussion about Iris above).
152152

153153
## Getting started
154154

doc/api.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Attributes and underlying data
2525
Dataset.coordinates
2626
Dataset.noncoordinates
2727
Dataset.dimensions
28-
Dataset.attributes
28+
Dataset.attrs
2929

3030
Dataset contents
3131
~~~~~~~~~~~~~~~~
@@ -112,7 +112,7 @@ Attributes and underlying data
112112
DataArray.coordinates
113113
DataArray.name
114114
DataArray.dataset
115-
DataArray.attributes
115+
DataArray.attrs
116116

117117
Selecting
118118
~~~~~~~~~

test/__init__.py

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import unittest
22

3+
import numpy as np
34
from numpy.testing import assert_array_equal
45

56
from xray import utils, DataArray
@@ -36,12 +37,26 @@ def requires_netCDF4(test):
3637
return test if has_netCDF4 else unittest.skip('requires netCDF4')(test)
3738

3839

40+
def data_allclose_or_equiv(arr1, arr2, rtol=1e-05, atol=1e-08):
41+
exact_dtypes = [np.datetime64, np.timedelta64, np.string_]
42+
if any(any(np.issubdtype(arr.dtype, t) for t in exact_dtypes)
43+
or arr.dtype == object for arr in [arr1, arr2]):
44+
return np.array_equal(arr1, arr2)
45+
else:
46+
return utils.allclose_or_equiv(arr1, arr2, rtol=rtol, atol=atol)
47+
48+
3949
class TestCase(unittest.TestCase):
4050
def assertVariableEqual(self, v1, v2):
4151
self.assertTrue(as_variable(v1).equals(v2))
4252

53+
def assertVariableIdentical(self, v1, v2):
54+
self.assertTrue(as_variable(v1).identical(v2))
55+
4356
def assertVariableAllClose(self, v1, v2, rtol=1e-05, atol=1e-08):
44-
self.assertTrue(utils.variable_allclose(v1, v2, rtol=rtol, atol=atol))
57+
self.assertEqual(v1.dimensions, v2.dimensions)
58+
self.assertTrue(data_allclose_or_equiv(v1.values, v2.values,
59+
rtol=rtol, atol=atol))
4560

4661
def assertVariableNotEqual(self, v1, v2):
4762
self.assertFalse(as_variable(v1).equals(v2))
@@ -52,36 +67,47 @@ def assertArrayEqual(self, a1, a2):
5267
def assertDatasetEqual(self, d1, d2):
5368
# this method is functionally equivalent to `assert d1 == d2`, but it
5469
# checks each aspect of equality separately for easier debugging
55-
self.assertTrue(utils.dict_equal(d1.attributes, d2.attributes))
5670
self.assertEqual(sorted(d1.variables), sorted(d2.variables))
5771
for k in d1:
5872
v1 = d1.variables[k]
5973
v2 = d2.variables[k]
6074
self.assertVariableEqual(v1, v2)
6175

76+
def assertDatasetIdentical(self, d1, d2):
77+
# this method is functionally equivalent to `assert d1.identical(d2)`,
78+
# but it checks each aspect of equality separately for easier debugging
79+
self.assertTrue(utils.dict_equal(d1.attrs, d2.attrs))
80+
self.assertEqual(sorted(d1.variables), sorted(d2.variables))
81+
for k in d1:
82+
v1 = d1.variables[k]
83+
v2 = d2.variables[k]
84+
self.assertTrue(v1.identical(v2))
85+
6286
def assertDatasetAllClose(self, d1, d2, rtol=1e-05, atol=1e-08):
63-
self.assertTrue(utils.dict_equal(d1.attributes, d2.attributes))
6487
self.assertEqual(sorted(d1.variables), sorted(d2.variables))
6588
for k in d1:
6689
v1 = d1.variables[k]
6790
v2 = d2.variables[k]
6891
self.assertVariableAllClose(v1, v2, rtol=rtol, atol=atol)
6992

93+
def assertCoordsEqual(self, d1, d2):
94+
self.assertEqual(sorted(d1.coordinates), sorted(d2.coordinates))
95+
for k in d1.coordinates:
96+
v1 = d1.coordinates[k]
97+
v2 = d2.coordinates[k]
98+
self.assertVariableEqual(v1, v2)
99+
70100
def assertDataArrayEqual(self, ar1, ar2):
101+
self.assertVariableEqual(ar1, ar2)
102+
self.assertCoordsEqual(ar1, ar2)
103+
104+
def assertDataArrayIdentical(self, ar1, ar2):
71105
self.assertEqual(ar1.name, ar2.name)
72-
self.assertDatasetEqual(ar1.dataset, ar2.dataset)
106+
self.assertDatasetIdentical(ar1.dataset, ar2.dataset)
73107

74108
def assertDataArrayAllClose(self, ar1, ar2, rtol=1e-05, atol=1e-08):
75-
self.assertEqual(ar1.name, ar2.name)
76-
self.assertDatasetAllClose(ar1.dataset, ar2.dataset,
77-
rtol=rtol, atol=atol)
78-
79-
def assertDataArrayEquiv(self, ar1, ar2):
80-
self.assertIsInstance(ar1, DataArray)
81-
self.assertIsInstance(ar2, DataArray)
82-
random_name = 'randomly-renamed-variable'
83-
self.assertDataArrayEqual(ar1.rename(random_name),
84-
ar2.rename(random_name))
109+
self.assertVariableAllClose(ar1, ar2, rtol=rtol, atol=atol)
110+
self.assertCoordsEqual(ar1, ar2)
85111

86112

87113
class ReturnItem(object):

test/test_backends.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def test_roundtrip_test_data(self):
6666
def test_roundtrip_string_data(self):
6767
expected = Dataset({'x': ('t', ['abc', 'def'])})
6868
actual = self.roundtrip(expected)
69-
self.assertDatasetEqual(expected, actual)
69+
self.assertDatasetIdentical(expected, actual)
7070

7171
def test_roundtrip_mask_and_scale(self):
7272
decoded = create_masked_and_scaled_data()
@@ -81,7 +81,7 @@ def test_roundtrip_mask_and_scale(self):
8181
def test_roundtrip_example_1_netcdf(self):
8282
expected = open_example_dataset('example_1.nc')
8383
actual = self.roundtrip(expected)
84-
self.assertDatasetEqual(expected, actual)
84+
self.assertDatasetIdentical(expected, actual)
8585

8686
def test_orthogonal_indexing(self):
8787
in_memory = create_test_data()
@@ -98,7 +98,7 @@ def test_orthogonal_indexing(self):
9898
def test_pickle(self):
9999
on_disk = open_example_dataset('bears.nc')
100100
unpickled = pickle.loads(pickle.dumps(on_disk))
101-
self.assertDatasetEqual(on_disk, unpickled)
101+
self.assertDatasetIdentical(on_disk, unpickled)
102102

103103

104104
@contextlib.contextmanager
@@ -206,7 +206,7 @@ def test_mask_and_scale(self):
206206
# now check xray
207207
ds = open_dataset(tmp_file)
208208
expected = create_masked_and_scaled_data()
209-
self.assertDatasetEqual(expected, ds)
209+
self.assertDatasetIdentical(expected, ds)
210210

211211
def test_0dimensional_variable(self):
212212
# This fix verifies our work-around to this netCDF4-python bug:
@@ -219,7 +219,7 @@ def test_0dimensional_variable(self):
219219

220220
ds = open_dataset(tmp_file)
221221
expected = Dataset({'x': ((), 123)})
222-
self.assertDatasetEqual(expected, ds)
222+
self.assertDatasetIdentical(expected, ds)
223223

224224
def test_variable_len_strings(self):
225225
with create_tmp_file() as tmp_file:
@@ -234,7 +234,7 @@ def test_variable_len_strings(self):
234234
expected = Dataset({'x': ('x', values)})
235235
for kwargs in [{}, {'decode_cf': True}]:
236236
actual = open_dataset(tmp_file, **kwargs)
237-
self.assertDatasetEqual(expected, actual)
237+
self.assertDatasetIdentical(expected, actual)
238238

239239

240240
@requires_netCDF4
@@ -251,9 +251,9 @@ def roundtrip(self, data, **kwargs):
251251

252252

253253
def clear_attributes(ds):
254-
ds.attributes.clear()
254+
ds.attrs.clear()
255255
for v in ds.itervalues():
256-
v.attributes.clear()
256+
v.attrs.clear()
257257

258258

259259
@requires_netCDF4
@@ -263,7 +263,5 @@ def test_cmp_local_file(self):
263263
url = 'http://test.opendap.org/opendap/hyrax/data/nc/bears.nc'
264264
actual = Dataset.load_store(backends.PydapDataStore(url))
265265
expected = open_example_dataset('bears.nc')
266-
# don't check attributes, since pydap decodes the strings improperly
267-
for ds in [actual, expected]:
268-
clear_attributes(ds)
266+
# don't check attributes since pydap doesn't serialize them correctly
269267
self.assertDatasetEqual(actual, expected)

0 commit comments

Comments
 (0)