diff --git a/.travis.yml b/.travis.yml index cd8bae3291b..74d934b0252 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ matrix: fast_finish: true include: - python: 2.6 - env: UPDATE_ENV="conda install unittest2" + env: UPDATE_ENV="conda install unittest2 pandas==0.13.1" # Test on Python 2.7 with and without netCDF4/scipy - python: 2.7 env: UPDATE_ENV="" diff --git a/xray/backends/netCDF4_.py b/xray/backends/netCDF4_.py index 25d7171377c..5458fafd48d 100644 --- a/xray/backends/netCDF4_.py +++ b/xray/backends/netCDF4_.py @@ -37,14 +37,6 @@ def __getitem__(self, key): return data -def _version_check(actual, required): - actual_tup = tuple(int(p) if p.isdigit() else p for p in actual.split('.')) - try: - return actual_tup >= required - except TypeError: - return True - - def _nc4_values_and_dtype(variable): if variable.dtype.kind in ['i', 'u', 'f'] or variable.dtype == 'S1': values = variable.values @@ -96,11 +88,6 @@ class NetCDF4DataStore(AbstractWritableDataStore): def __init__(self, filename, mode='r', clobber=True, diskless=False, persist=False, format='NETCDF4', group=None): import netCDF4 as nc4 - if not _version_check(nc4.__version__, (1, 0, 6)): - warnings.warn('python-netCDF4 %s detected; ' - 'the minimal recommended version is 1.0.6.' - % nc4.__version__, ImportWarning) - ds = nc4.Dataset(filename, mode=mode, clobber=clobber, diskless=diskless, persist=persist, format=format) diff --git a/xray/core/common.py b/xray/core/common.py index 3f18ecaa6fc..f8396dc26f1 100644 --- a/xray/core/common.py +++ b/xray/core/common.py @@ -117,8 +117,7 @@ def __contains__(self, key): return key in self._data.dims def __repr__(self): - return '\n'.join(formatting.wrap_indent(repr(v.to_index()), '%s: ' % k) - for k, v in self.items()) + return formatting.coords_repr(self) @staticmethod def _convert_to_coord(key, value, expected_size=None): diff --git a/xray/core/dataarray.py b/xray/core/dataarray.py index 105a16d945f..8de81e1b02f 100644 --- a/xray/core/dataarray.py +++ b/xray/core/dataarray.py @@ -292,8 +292,9 @@ def values(self): def values(self, value): self.variable.values = value + @property def _in_memory(self): - return self.variable._in_memory() + return self.variable._in_memory @property def as_index(self): diff --git a/xray/core/formatting.py b/xray/core/formatting.py index 6ba8db12be4..1c421baeb97 100644 --- a/xray/core/formatting.py +++ b/xray/core/formatting.py @@ -1,13 +1,10 @@ -from .pycompat import iteritems +from datetime import datetime +import itertools +import numpy as np +import pandas as pd -def _summarize_attributes(data): - if data.attrs: - attr_summary = '\n'.join(' %s: %s' % (k, v) for k, v - in iteritems(data.attrs)) - else: - attr_summary = ' Empty' - return attr_summary +from .pycompat import iteritems, itervalues, unicode_type, bytes_type def wrap_indent(text, start='', length=None): @@ -17,7 +14,101 @@ def wrap_indent(text, start='', length=None): return start + indent.join(x for x in text.splitlines()) +def _get_indexer_at_least_n_items(shape, n_desired): + assert 0 < n_desired <= np.prod(shape) + cum_items = np.cumprod(shape[::-1]) + n_steps = np.argmax(cum_items >= n_desired) + stop = int(np.ceil(float(n_desired) / np.r_[1, cum_items][n_steps])) + indexer = ((0,) * (len(shape) - 1 - n_steps) + (slice(stop),) + + (slice(None),) * n_steps) + return indexer + + +def first_n_items(x, n_desired): + """Returns the first n_desired items of an array""" + # Unfortunately, we can't just do x.flat[:n_desired] here because x might + # not be a numpy.ndarray. Moreover, access to elements of x could be very + # expensive (e.g. if it's only available over DAP), so go out of our way to + # get them in a single call to __getitem__ using only slices. + if n_desired < 1: + raise ValueError('must request at least one item') + if n_desired < x.size: + indexer = _get_indexer_at_least_n_items(x.shape, n_desired) + x = x[indexer] + return np.asarray(x).flat[:n_desired] + + +def format_item(x): + """Returns a succinct summary of an object as a string""" + if isinstance(x, (np.datetime64, datetime)): + date_str, time_str = str(pd.Timestamp(x)).split() + if time_str == '00:00:00': + return date_str + else: + return '%sT%s' % (date_str, time_str) + elif isinstance(x, (unicode_type, bytes_type)): + return repr(x) + elif isinstance(x, (float, np.float)): + return '{0:.4}'.format(x) + else: + return str(x) + + +def format_array_flat(items_ndarray, max_width): + """Return a formatted string for as many items in the flattened version of + items_ndarray that will fit within max_width characters + """ + # every item will take up at least two characters + max_possibly_relevant = int(np.ceil(max_width / 2.0)) + relevant_items = first_n_items(items_ndarray, max_possibly_relevant) + pprint_items = list(map(format_item, relevant_items)) + + end_padding = ' ...' + + cum_len = np.cumsum([len(s) + 1 for s in pprint_items]) + gt_max_width = cum_len > (max_width - len(end_padding)) + if not gt_max_width.any(): + num_to_print = len(pprint_items) + else: + num_to_print = max(np.argmax(gt_max_width) - 1, 1) + + pprint_str = ' '.join(itertools.islice(pprint_items, int(num_to_print))) + remaining_chars = max_width - len(pprint_str) - len(end_padding) + if remaining_chars > 0 and num_to_print < items_ndarray.size: + pprint_str += end_padding + return pprint_str + + +def summarize_var(name, var, first_col_width, max_width=100, show_values=True): + first_col = pretty_print(' %s ' % name, first_col_width) + dims_str = '(%s) ' % ', '.join(map(str, var.dims)) if var.dims else '' + front_str = first_col + dims_str + ('%s ' % var.dtype) + if show_values: + # print '%s: showing values' % name + values_str = format_array_flat(var, max_width - len(front_str)) + else: + values_str = '...' + return front_str + values_str + + +def coords_repr(coords): + col_width = (max(len(str(k)) for k in coords) if coords else 0) + 5 + summary = ['Coordinates:'] + summary.extend(summarize_var(k, v, col_width) for k, v in coords.items()) + return '\n'.join(summary) + + +def _summarize_attributes(data, indent=' '): + if data.attrs: + attr_summary = '\n'.join('%s%s: %s' % (indent, k, v) for k, v + in iteritems(data.attrs)) + else: + attr_summary = indent + 'Empty' + return attr_summary + + def array_repr(arr): + # used for DataArray, Variable and Coordinate if hasattr(arr, 'name') and arr.name is not None: name_str = '%r ' % arr.name else: @@ -25,14 +116,13 @@ def array_repr(arr): dim_summary = ', '.join('%s: %s' % (k, v) for k, v in zip(arr.dims, arr.shape)) summary = [''% (type(arr).__name__, name_str, dim_summary)] - if arr.size < 1e5 or arr._in_memory(): + if arr.size < 1e5 or arr._in_memory: summary.append(repr(arr.values)) else: summary.append('[%s values with dtype=%s]' % (arr.size, arr.dtype)) - if hasattr(arr, 'dataset'): + if hasattr(arr, 'coords'): if arr.coords: - summary.append('Coordinates:') - summary.append(wrap_indent(repr(arr.coords), ' ')) + summary.append(repr(arr.coords)) other_vars = [k for k in arr.dataset if k not in arr.coords and k != arr.name] if other_vars: @@ -54,45 +144,28 @@ def pretty_print(x, numchars): return s + ' ' * (numchars - len(s)) -def dataset_repr(ds): +def dataset_repr(ds, preview_all_values=False): summary = ['' % type(ds).__name__] - max_name_length = max(len(k) for k in ds.variables) if ds else 0 - first_col_width = max(4 + max_name_length, 16) + max_name_length = max(len(str(k)) for k in ds.variables) if ds else 0 + first_col_width = max(5 + max_name_length, 16) coords_str = pretty_print('Dimensions:', first_col_width) all_dim_strings = ['%s: %s' % (k, v) for k, v in iteritems(ds.dims)] summary.append('%s(%s)' % (coords_str, ', '.join(all_dim_strings))) - def summarize_var(k, not_found=' ', found=int): - v = ds.variables[k] - dim_strs = [] - for n, d in enumerate(ds.dims): - length = len(all_dim_strings[n]) - prepend = ' ' * (length // 2) - if d in v.dims: - if found is int: - indicator = str(v.dims.index(d)) - else: - indicator = found - else: - indicator = not_found - dim_strs.append(pretty_print(prepend + indicator, length)) - string = pretty_print(' ' + k, first_col_width) + ' ' - string += ' '.join(dim_strs) - return string - - def summarize_variables(variables, not_found=' ', found=int): - if variables: - return [summarize_var(k, not_found, found) for k in variables] - else: - return [' None'] + def summarize_variables(variables, always_show_values): + return ([summarize_var(v.name, v, first_col_width, + show_values=(always_show_values or v._in_memory)) + for v in itervalues(variables)] + or [' Empty']) summary.append('Coordinates:') - summary.extend(summarize_variables(ds.coords, ' ', 'X')) + summary.extend(summarize_variables(ds.coords, always_show_values=True)) summary.append('Noncoordinates:') - summary.extend(summarize_variables(ds.noncoords, ' ', int)) + summary.extend(summarize_variables( + ds.noncoords, always_show_values=preview_all_values)) - summary.append('Attributes:\n%s' % _summarize_attributes(ds)) + summary.append('Attributes:\n%s' % _summarize_attributes(ds, ' ')) return '\n'.join(summary) diff --git a/xray/core/variable.py b/xray/core/variable.py index 0fb4c91f1f5..bb35b69e6d0 100644 --- a/xray/core/variable.py +++ b/xray/core/variable.py @@ -236,6 +236,7 @@ def ndim(self): def __len__(self): return len(self._data) + @property def _in_memory(self): return isinstance(self._data, (NumpyArrayAdapter, PandasIndexAdapter)) diff --git a/xray/test/test_backends.py b/xray/test/test_backends.py index 688bc4be05e..0dee99deb30 100644 --- a/xray/test/test_backends.py +++ b/xray/test/test_backends.py @@ -80,11 +80,11 @@ def test_load_data(self): def assert_loads(vars=None): with self.roundtrip(expected) as actual: for v in actual.variables.values(): - self.assertFalse(v._in_memory()) + self.assertFalse(v._in_memory) yield actual for k, v in actual.variables.items(): if vars is None or k in vars: - self.assertTrue(v._in_memory()) + self.assertTrue(v._in_memory) self.assertDatasetAllClose(expected, actual) with self.assertRaises(AssertionError): diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py index 86dd9fb8b93..abfd723de0e 100644 --- a/xray/test/test_dataarray.py +++ b/xray/test/test_dataarray.py @@ -21,18 +21,17 @@ def test_repr(self): v = Variable(['time', 'x'], [[1, 2, 3], [4, 5, 6]], {'foo': 'bar'}) data_array = Dataset({'my_variable': v, 'other': ([], 0)} )['my_variable'] - expected = dedent(""" + expected = dedent("""\ array([[1, 2, 3], [4, 5, 6]]) Coordinates: - time: Int64Index([0, 1], dtype='int64') - x: Int64Index([0, 1, 2], dtype='int64') + time (time) int64 0 1 + x (x) int64 0 1 2 Linked dataset variables: other Attributes: - foo: bar - """).strip() + foo: bar""") self.assertEqual(expected, repr(data_array)) def test_properties(self): @@ -310,8 +309,9 @@ def test_coords(self): da.coords['foo'] expected = dedent("""\ - x: Int64Index([-1, -2], dtype='int64') - y: Int64Index([0, 1, 2], dtype='int64')""") + Coordinates: + x (x) int64 -1 -2 + y (y) int64 0 1 2""") actual = repr(da.coords) self.assertEquals(expected, actual) diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py index 765b151a8dc..e8c2fafc989 100644 --- a/xray/test/test_dataset.py +++ b/xray/test/test_dataset.py @@ -70,36 +70,37 @@ def store_variables(self): class TestDataset(TestCase): def test_repr(self): - data = create_test_data() - expected = dedent(""" + data = create_test_data(seed=123) + # need to insert str dtype at runtime to handle both Python 2 & 3 + expected = dedent("""\ Dimensions: (dim1: 100, dim2: 50, dim3: 10, time: 20) Coordinates: - dim1 X - dim2 X - dim3 X - time X + dim1 (dim1) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 ... + dim2 (dim2) float64 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 ... + dim3 (dim3) %s 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' + time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04 ... Noncoordinates: - var1 0 1 - var2 0 1 - var3 1 0 + var1 (dim1, dim2) float64 -1.086 0.9973 0.283 -1.506 -0.5786 1.651 -2.427 ... + var2 (dim1, dim2) float64 0.3188 1.511 -1.137 0.6425 -1.128 -0.5536 -0.9695 ... + var3 (dim3, dim1) float64 -1.241 -0.3129 -0.8489 2.378 0.6575 0.2131 -0.491 ... Attributes: - Empty - """).strip() + Empty""") % data['dim3'].dtype actual = '\n'.join(x.rstrip() for x in repr(data).split('\n')) + print(actual) self.assertEqual(expected, actual) - expected = dedent(""" + expected = dedent("""\ Dimensions: () Coordinates: - None + Empty Noncoordinates: - None - Attributes: Empty - """).strip() + Attributes: + Empty""") actual = '\n'.join(x.rstrip() for x in repr(Dataset()).split('\n')) + print(actual) self.assertEqual(expected, actual) def test_constructor(self): @@ -178,8 +179,9 @@ def test_coords_properties(self): data.coords[0] expected = dedent("""\ - x: Int64Index([-1, -2], dtype='int64') - y: Int64Index([0, 1, 2], dtype='int64')""") + Coordinates: + x (x) int64 -1 -2 + y (y) int64 0 1 2""") actual = repr(data.coords) self.assertEquals(expected, actual) diff --git a/xray/test/test_formatting.py b/xray/test/test_formatting.py new file mode 100644 index 00000000000..489ac192e37 --- /dev/null +++ b/xray/test/test_formatting.py @@ -0,0 +1,68 @@ +import numpy as np +import pandas as pd + +from xray.core import formatting +from xray.core.pycompat import PY3 + +from . import TestCase + + +class TestFormatting(TestCase): + + def test_get_indexer_at_least_n_items(self): + cases = [ + ((20,), (slice(10),)), + ((3, 20,), (0, slice(10))), + ((2, 10,), (0, slice(10))), + ((2, 5,), (slice(2), slice(None))), + ((1, 2, 5,), (0, slice(2), slice(None))), + ((2, 3, 5,), (0, slice(2), slice(None))), + ((1, 10, 1,), (0, slice(10), slice(None))), + ((2, 5, 1,), (slice(2), slice(None), slice(None))), + ((2, 5, 3,), (0, slice(4), slice(None))), + ((2, 3, 3,), (slice(2), slice(None), slice(None))), + ] + for shape, expected in cases: + actual = formatting._get_indexer_at_least_n_items(shape, 10) + self.assertEqual(expected, actual) + + def test_first_n_items(self): + array = np.arange(100).reshape(10, 5, 2) + for n in [3, 10, 13, 100, 200]: + actual = formatting.first_n_items(array, n) + expected = array.flat[:n] + self.assertItemsEqual(expected, actual) + + with self.assertRaisesRegexp(ValueError, 'at least one item'): + formatting.first_n_items(array, 0) + + def test_format_item(self): + cases = [ + (pd.Timestamp('2000-01-01T12'), '2000-01-01T12:00:00'), + (pd.Timestamp('2000-01-01'), '2000-01-01'), + ('foo', "'foo'"), + (u'foo', "'foo'" if PY3 else "u'foo'"), + (b'foo', "b'foo'" if PY3 else "'foo'"), + (1, '1'), + (1.0, '1.0'), + ] + for item, expected in cases: + actual = formatting.format_item(item) + self.assertEqual(expected, actual) + + def format_array_flat(self): + actual = formatting.format_array_flat(np.arange(100), 10), + expected = '0 1 2 3 4 ...' + self.assertEqual(expected, actual) + + actual = formatting.format_array_flat(np.arange(100.0), 10), + expected = '0.0 1.0 ...' + self.assertEqual(expected, actual) + + actual = formatting.format_array_flat(np.arange(100.0), 1), + expected = '0.0 ...' + self.assertEqual(expected, actual) + + actual = formatting.format_array_flat(np.arange(3), 5), + expected = '0 1 2' + self.assertEqual(expected, actual)