From 50d15eafcb9856e1a978dd2fe3d123b2fb004c30 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 20 Dec 2016 20:19:27 -0800 Subject: [PATCH 1/6] add attr_info method to dataset, first pass --- doc/whats-new.rst | 4 ++++ xarray/core/dataset.py | 38 ++++++++++++++++++++++++++++++++++ xarray/core/formatting.py | 9 +++++++- xarray/test/test_dataset.py | 37 +++++++++++++++++++++++++++++++++ xarray/test/test_formatting.py | 3 +-- 5 files changed, 88 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 350b472af56..41e8fa7b7e6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -147,6 +147,10 @@ Enhancements plots (:issue:`897`). See :ref:`plotting.figsize` for more details. By `Stephan Hoyer `_ and `Fabien Maussion `_. +- New :py:meth:`~Dataset.attr_info` method to summarize ``Dataset`` variables + and attributes. The method produces a stirng output similar to what the + command line utility ``ncdump -h`` produces (:issue:`1150`). + By `Joe Hamman `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 9a3e2117d80..b8c9c2236d7 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5,6 +5,8 @@ from collections import Mapping from numbers import Number +import sys + import numpy as np import pandas as pd @@ -802,6 +804,42 @@ def to_netcdf(self, path=None, mode='w', format=None, group=None, def __unicode__(self): return formatting.dataset_repr(self) + def attr_info(self, buf=None): + """ + Concise summary of a Dataset variables and attributes. + Parameters + ---------- + buf : writable buffer, defaults to sys.stdout + + See Also + -------- + pandas.DataFrame.assign + netCDF's ncdump + """ + + if buf is None: # pragma: no cover + buf = sys.stdout + + lines = [] + lines.append('xarray.Dataset {') + lines.append('dimensions:') + for name, size in self.dims.items(): + lines.append('\t{name} = {size} ;'.format(name=name, size=size)) + lines.append('\nvariables:') + for name, da in self.variables.items(): + dims = ', '.join(da.dims) + lines.append('\t{type} {name}({dims}) ;'.format( + type=da.dtype, name=name, dims=dims)) + for k, v in da.attrs.items(): + lines.append('\t\t{name}:{k} = {v} ;'.format(name=name, k=k, + v=v)) + lines.append('\n// global attributes:') + for k, v in self.attrs.items(): + lines.append('\t:{k} = {v} ;'.format(k=k, v=v)) + lines.append('}') + + formatting._put_lines(buf, lines) + @property def chunks(self): """Block dimensions for this dataset's data or None if it's not a dask diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index cdcc4b4e1a5..cc7a3a71041 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -16,7 +16,7 @@ from .options import OPTIONS from .pycompat import ( - PY2, unicode_type, bytes_type, dask_array_type, OrderedDict) + PY2, unicode_type, bytes_type, dask_array_type, OrderedDict, basestring) def pretty_print(x, numchars): @@ -361,6 +361,13 @@ def array_repr(arr): return u'\n'.join(summary) +def _put_lines(buf, lines): + '''see also from pandas.formats.format import _put_lines''' + if any(isinstance(x, basestring) for x in lines): + lines = [basestring(x) for x in lines] + buf.write('\n'.join(lines)) + + def dataset_repr(ds): summary = [u'' % type(ds).__name__] diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index 1a970fe718d..ec8bb3064a7 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -12,6 +12,10 @@ import dask.array as da except ImportError: pass +try: + from io import StringIO +except ImportError: + from cStringIO import StringIO import numpy as np import pandas as pd @@ -190,6 +194,39 @@ def test_unicode_data(self): actual = unicode_type(data) self.assertEqual(expected, actual) + def test_attr_info(self): + data = create_test_data(seed=123) + data.attrs['foo'] = 'bar' + buf = StringIO() + data.attr_info(buf=buf) + + expected = dedent('''\ + xarray.Dataset { + dimensions: + dim1 = 8 ; + dim2 = 9 ; + dim3 = 10 ; + time = 20 ; + + variables: + datetime64[ns] time(time) ; + float64 dim2(dim2) ; + Date: Wed, 21 Dec 2016 14:11:15 -0800 Subject: [PATCH 2/6] fix py2 string bug --- xarray/core/formatting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index cc7a3a71041..752437f814c 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -364,7 +364,7 @@ def array_repr(arr): def _put_lines(buf, lines): '''see also from pandas.formats.format import _put_lines''' if any(isinstance(x, basestring) for x in lines): - lines = [basestring(x) for x in lines] + lines = [ensure_valid_repr(x) for x in lines] buf.write('\n'.join(lines)) From 49bb76c26ab6391181ef4f8a88ef21baa0ab8fbd Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Wed, 21 Dec 2016 15:55:48 -0800 Subject: [PATCH 3/6] change name to ds.info and fix py2 string issue --- doc/api.rst | 1 + doc/whats-new.rst | 6 +++--- xarray/core/dataset.py | 2 +- xarray/core/formatting.py | 5 ++++- xarray/test/test_dataset.py | 16 ++++++++-------- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index c66e61dddf8..5f708200086 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -411,6 +411,7 @@ Dataset methods Dataset.load Dataset.chunk Dataset.filter_by_attrs + Dataset.info DataArray methods ----------------- diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 41e8fa7b7e6..cf80be4a740 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -147,9 +147,9 @@ Enhancements plots (:issue:`897`). See :ref:`plotting.figsize` for more details. By `Stephan Hoyer `_ and `Fabien Maussion `_. -- New :py:meth:`~Dataset.attr_info` method to summarize ``Dataset`` variables - and attributes. The method produces a stirng output similar to what the - command line utility ``ncdump -h`` produces (:issue:`1150`). +- New :py:meth:`~Dataset.info` method to summarize ``Dataset`` variables + and attributes. The method prints to a buffer (e.g. ``stdout``) with output + similar to what the command line utility ``ncdump -h`` produces (:issue:`1150`). By `Joe Hamman `_. Bug fixes diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b8c9c2236d7..cd1e7294b0b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -804,7 +804,7 @@ def to_netcdf(self, path=None, mode='w', format=None, group=None, def __unicode__(self): return formatting.dataset_repr(self) - def attr_info(self, buf=None): + def info(self, buf=None): """ Concise summary of a Dataset variables and attributes. Parameters diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 752437f814c..01e3b8ff338 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -364,7 +364,10 @@ def array_repr(arr): def _put_lines(buf, lines): '''see also from pandas.formats.format import _put_lines''' if any(isinstance(x, basestring) for x in lines): - lines = [ensure_valid_repr(x) for x in lines] + if PY2: + lines = [unicode(x) for x in lines] + else: + lines = [basestring(x) for x in lines] buf.write('\n'.join(lines)) diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index ec8bb3064a7..3d88804407f 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -194,13 +194,14 @@ def test_unicode_data(self): actual = unicode_type(data) self.assertEqual(expected, actual) - def test_attr_info(self): - data = create_test_data(seed=123) - data.attrs['foo'] = 'bar' + def test_info(self): + ds = create_test_data(seed=123) + ds = ds.drop('dim3') # string type prints differently in PY2 vs PY3 + ds.attrs['foo'] = 'bar' buf = StringIO() - data.attr_info(buf=buf) + ds.info(buf=buf) - expected = dedent('''\ + expected = dedent(u'''\ xarray.Dataset { dimensions: dim1 = 8 ; @@ -211,7 +212,6 @@ def test_attr_info(self): variables: datetime64[ns] time(time) ; float64 dim2(dim2) ; - Date: Thu, 22 Dec 2016 20:18:24 -0800 Subject: [PATCH 4/6] cleanup after @shoyer's review --- setup.cfg | 2 +- xarray/core/dataset.py | 28 +++++++++++++++------------- xarray/core/formatting.py | 12 ++---------- xarray/core/pycompat.py | 2 ++ xarray/test/test_dataset.py | 15 +++++++-------- 5 files changed, 27 insertions(+), 32 deletions(-) diff --git a/setup.cfg b/setup.cfg index 6770e9c807f..44b0d881cc2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [wheel] universal = 1 -[pytest] +[tool:pytest] python_files=test_*.py diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index cd1e7294b0b..36d71998d82 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -27,10 +27,10 @@ from .variable import (Variable, as_variable, IndexVariable, broadcast_variables) from .pycompat import (iteritems, basestring, OrderedDict, dask_array_type, range) +from .formatting import ensure_valid_repr from .combine import concat from .options import OPTIONS - # list of attributes of pd.DatetimeIndex that are ndarrays of time info _DATETIMEINDEX_COMPONENTS = ['year', 'month', 'day', 'hour', 'minute', 'second', 'microsecond', 'nanosecond', 'date', @@ -807,6 +807,7 @@ def __unicode__(self): def info(self, buf=None): """ Concise summary of a Dataset variables and attributes. + Parameters ---------- buf : writable buffer, defaults to sys.stdout @@ -821,24 +822,25 @@ def info(self, buf=None): buf = sys.stdout lines = [] - lines.append('xarray.Dataset {') - lines.append('dimensions:') + lines.append(u'xarray.Dataset {') + lines.append(u'dimensions:') for name, size in self.dims.items(): - lines.append('\t{name} = {size} ;'.format(name=name, size=size)) - lines.append('\nvariables:') + lines.append(u'\t{name} = {size} ;'.format(name=name, size=size)) + lines.append(u'\nvariables:') for name, da in self.variables.items(): - dims = ', '.join(da.dims) - lines.append('\t{type} {name}({dims}) ;'.format( + dims = u', '.join(da.dims) + lines.append(u'\t{type} {name}({dims}) ;'.format( type=da.dtype, name=name, dims=dims)) for k, v in da.attrs.items(): - lines.append('\t\t{name}:{k} = {v} ;'.format(name=name, k=k, - v=v)) - lines.append('\n// global attributes:') + lines.append(u'\t\t{name}:{k} = {v} ;'.format(name=name, k=k, + v=v)) + lines.append(u'\n// global attributes:') for k, v in self.attrs.items(): - lines.append('\t:{k} = {v} ;'.format(k=k, v=v)) - lines.append('}') + lines.append(u'\t:{k} = {v} ;'.format(k=k, v=v)) + lines.append(u'}') - formatting._put_lines(buf, lines) + lines = [ensure_valid_repr(line) for line in lines] + buf.write('\n'.join(lines)) @property def chunks(self): diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 01e3b8ff338..91823fa6a67 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -87,6 +87,7 @@ def first_n_items(x, n_desired): x = x[indexer] return np.asarray(x).flat[:n_desired] + def last_item(x): """Returns the last item of an array""" if x.size == 0: @@ -96,6 +97,7 @@ def last_item(x): indexer = (slice(-1, None), ) * x.ndim return np.array(x[indexer], ndmin=1) + def format_timestamp(t): """Cast given object to a Timestamp and return a nicely formatted string""" # Timestamp is only valid for 1678 to 2262 @@ -361,16 +363,6 @@ def array_repr(arr): return u'\n'.join(summary) -def _put_lines(buf, lines): - '''see also from pandas.formats.format import _put_lines''' - if any(isinstance(x, basestring) for x in lines): - if PY2: - lines = [unicode(x) for x in lines] - else: - lines = [basestring(x) for x in lines] - buf.write('\n'.join(lines)) - - def dataset_repr(ds): summary = [u'' % type(ds).__name__] diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index 671f38c7df7..193ffca0340 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -22,6 +22,7 @@ def itervalues(d): from functools import reduce import builtins from urllib.request import urlretrieve + from io import StringIO else: # pragma: no cover # Python 2 basestring = basestring @@ -39,6 +40,7 @@ def itervalues(d): reduce = reduce import __builtin__ as builtins from urllib import urlretrieve + from cStringIO import StringIO try: from cyordereddict import OrderedDict diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index 3d88804407f..b8c65f3f8f5 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -12,10 +12,6 @@ import dask.array as da except ImportError: pass -try: - from io import StringIO -except ImportError: - from cStringIO import StringIO import numpy as np import pandas as pd @@ -26,7 +22,7 @@ Dataset, DataArray, Variable, IndexVariable, auto_combine, open_dataset, set_options, MergeError) from xarray.core import indexing, utils -from xarray.core.pycompat import iteritems, OrderedDict, unicode_type +from xarray.core.pycompat import iteritems, OrderedDict, unicode_type, StringIO from xarray.core.common import full_like from . import (TestCase, unittest, InaccessibleArray, UnexpectedDataAccess, @@ -197,11 +193,13 @@ def test_unicode_data(self): def test_info(self): ds = create_test_data(seed=123) ds = ds.drop('dim3') # string type prints differently in PY2 vs PY3 - ds.attrs['foo'] = 'bar' + ds.attrs['unicode_attr'] = u'ba®' + ds.attrs['string_attr'] = 'bar' + buf = StringIO() ds.info(buf=buf) - expected = dedent(u'''\ + expected = dedent('''\ xarray.Dataset { dimensions: dim1 = 8 ; @@ -221,7 +219,8 @@ def test_info(self): int64 numbers(dim3) ; // global attributes: - :foo = bar ; + :unicode_attr = ba® ; + :string_attr = bar ; }''') actual = buf.getvalue() self.assertEqual(expected, actual) From 2b566781ff0d01ce5aecc61a3081c81f83321144 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Thu, 22 Dec 2016 21:44:01 -0700 Subject: [PATCH 5/6] add tabs to example --- xarray/test/test_dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index b8c65f3f8f5..e2ad56964b3 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -219,8 +219,8 @@ def test_info(self): int64 numbers(dim3) ; // global attributes: - :unicode_attr = ba® ; - :string_attr = bar ; + :unicode_attr = ba® ; + :string_attr = bar ; }''') actual = buf.getvalue() self.assertEqual(expected, actual) From 74df621815e32c8bad7a8b01d194d205a9d0b901 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Fri, 23 Dec 2016 10:23:44 -0700 Subject: [PATCH 6/6] fix unicode error on python2 --- .travis.yml | 2 +- xarray/core/dataset.py | 3 +-- xarray/core/pycompat.py | 2 -- xarray/test/test_dataset.py | 5 +++-- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index e887ec60467..348b929a75e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -69,7 +69,7 @@ install: - python setup.py install script: - - py.test xarray --cov=xarray --cov-report term-missing + - py.test xarray --cov=xarray --cov-report term-missing --verbose after_success: - coveralls diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 36d71998d82..4a12cf83a04 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -839,8 +839,7 @@ def info(self, buf=None): lines.append(u'\t:{k} = {v} ;'.format(k=k, v=v)) lines.append(u'}') - lines = [ensure_valid_repr(line) for line in lines] - buf.write('\n'.join(lines)) + buf.write(u'\n'.join(lines)) @property def chunks(self): diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index 193ffca0340..671f38c7df7 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -22,7 +22,6 @@ def itervalues(d): from functools import reduce import builtins from urllib.request import urlretrieve - from io import StringIO else: # pragma: no cover # Python 2 basestring = basestring @@ -40,7 +39,6 @@ def itervalues(d): reduce = reduce import __builtin__ as builtins from urllib import urlretrieve - from cStringIO import StringIO try: from cyordereddict import OrderedDict diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index e2ad56964b3..d5b77b70922 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -12,6 +12,7 @@ import dask.array as da except ImportError: pass +from io import StringIO import numpy as np import pandas as pd @@ -22,7 +23,7 @@ Dataset, DataArray, Variable, IndexVariable, auto_combine, open_dataset, set_options, MergeError) from xarray.core import indexing, utils -from xarray.core.pycompat import iteritems, OrderedDict, unicode_type, StringIO +from xarray.core.pycompat import iteritems, OrderedDict, unicode_type from xarray.core.common import full_like from . import (TestCase, unittest, InaccessibleArray, UnexpectedDataAccess, @@ -199,7 +200,7 @@ def test_info(self): buf = StringIO() ds.info(buf=buf) - expected = dedent('''\ + expected = dedent(u'''\ xarray.Dataset { dimensions: dim1 = 8 ;