pydata · shoyer · Aug 25, 2014 · Aug 21, 2014 · Aug 24, 2014 · Aug 24, 2014
diff --git a/.travis.yml b/.travis.yml
@@ -7,7 +7,7 @@ matrix:
   fast_finish: true
   include:
   - python: 2.6
-    env: UPDATE_ENV="conda install unittest2"
+    env: UPDATE_ENV="conda install unittest2 pandas==0.13.1"
   # Test on Python 2.7 with and without netCDF4/scipy
   - python: 2.7
     env: UPDATE_ENV=""

diff --git a/xray/backends/netCDF4_.py b/xray/backends/netCDF4_.py
@@ -37,14 +37,6 @@ def __getitem__(self, key):
         return data
 
 
-def _version_check(actual, required):
-    actual_tup = tuple(int(p) if p.isdigit() else p for p in actual.split('.'))
-    try:
-        return actual_tup >= required
-    except TypeError:
-        return True
-
-
 def _nc4_values_and_dtype(variable):
     if variable.dtype.kind in ['i', 'u', 'f'] or variable.dtype == 'S1':
         values = variable.values
@@ -96,11 +88,6 @@ class NetCDF4DataStore(AbstractWritableDataStore):
     def __init__(self, filename, mode='r', clobber=True, diskless=False,
                  persist=False, format='NETCDF4', group=None):
         import netCDF4 as nc4
-        if not _version_check(nc4.__version__, (1, 0, 6)):
-            warnings.warn('python-netCDF4 %s detected; '
-                          'the minimal recommended version is 1.0.6.'
-                          % nc4.__version__, ImportWarning)
-
         ds = nc4.Dataset(filename, mode=mode, clobber=clobber,
                          diskless=diskless, persist=persist,
                          format=format)

diff --git a/xray/core/common.py b/xray/core/common.py
@@ -117,8 +117,7 @@ def __contains__(self, key):
         return key in self._data.dims
 
     def __repr__(self):
-        return '\n'.join(formatting.wrap_indent(repr(v.to_index()), '%s: ' % k)
-                         for k, v in self.items())
+        return formatting.coords_repr(self)
 
     @staticmethod
     def _convert_to_coord(key, value, expected_size=None):

diff --git a/xray/core/dataarray.py b/xray/core/dataarray.py
@@ -292,8 +292,9 @@ def values(self):
     def values(self, value):
         self.variable.values = value
 
+    @property
     def _in_memory(self):
-        return self.variable._in_memory()
+        return self.variable._in_memory
 
     @property
     def as_index(self):

diff --git a/xray/core/formatting.py b/xray/core/formatting.py
@@ -1,13 +1,10 @@
-from .pycompat import iteritems
+from datetime import datetime
+import itertools
 
+import numpy as np
+import pandas as pd
 
-def _summarize_attributes(data):
-    if data.attrs:
-        attr_summary = '\n'.join('    %s: %s' % (k, v) for k, v
-                                 in iteritems(data.attrs))
-    else:
-        attr_summary = '    Empty'
-    return attr_summary
+from .pycompat import iteritems, itervalues, unicode_type, bytes_type
 
 
 def wrap_indent(text, start='', length=None):
@@ -17,22 +14,115 @@ def wrap_indent(text, start='', length=None):
     return start + indent.join(x for x in text.splitlines())
 
 
+def _get_indexer_at_least_n_items(shape, n_desired):
+    assert 0 < n_desired <= np.prod(shape)
+    cum_items = np.cumprod(shape[::-1])
+    n_steps = np.argmax(cum_items >= n_desired)
+    stop = int(np.ceil(float(n_desired) / np.r_[1, cum_items][n_steps]))
+    indexer = ((0,) * (len(shape) - 1 - n_steps) + (slice(stop),)
+               + (slice(None),) * n_steps)
+    return indexer
+
+
+def first_n_items(x, n_desired):
+    """Returns the first n_desired items of an array"""
+    # Unfortunately, we can't just do x.flat[:n_desired] here because x might
+    # not be a numpy.ndarray. Moreover, access to elements of x could be very
+    # expensive (e.g. if it's only available over DAP), so go out of our way to
+    # get them in a single call to __getitem__ using only slices.
+    if n_desired < 1:
+        raise ValueError('must request at least one item')
+    if n_desired < x.size:
+        indexer = _get_indexer_at_least_n_items(x.shape, n_desired)
+        x = x[indexer]
+    return np.asarray(x).flat[:n_desired]
+
+
+def format_item(x):
+    """Returns a succinct summary of an object as a string"""
+    if isinstance(x, (np.datetime64, datetime)):
+        date_str, time_str = str(pd.Timestamp(x)).split()
+        if time_str == '00:00:00':
+            return date_str
+        else:
+            return '%sT%s' % (date_str, time_str)
+    elif isinstance(x, (unicode_type, bytes_type)):
+        return repr(x)
+    elif isinstance(x, (float, np.float)):
+        return '{0:.4}'.format(x)
+    else:
+        return str(x)
+
+
+def format_array_flat(items_ndarray, max_width):
+    """Return a formatted string for as many items in the flattened version of
+    items_ndarray that will fit within max_width characters
+    """
+    # every item will take up at least two characters
+    max_possibly_relevant = int(np.ceil(max_width / 2.0))
+    relevant_items = first_n_items(items_ndarray, max_possibly_relevant)
+    pprint_items = list(map(format_item, relevant_items))
+
+    end_padding = ' ...'
+
+    cum_len = np.cumsum([len(s) + 1 for s in pprint_items])
+    gt_max_width = cum_len > (max_width - len(end_padding))
+    if not gt_max_width.any():
+        num_to_print = len(pprint_items)
+    else:
+        num_to_print = max(np.argmax(gt_max_width) - 1, 1)
+
+    pprint_str = ' '.join(itertools.islice(pprint_items, int(num_to_print)))
+    remaining_chars = max_width - len(pprint_str) - len(end_padding)
+    if remaining_chars > 0 and num_to_print < items_ndarray.size:
+        pprint_str += end_padding
+    return pprint_str
+
+
+def summarize_var(name, var, first_col_width, max_width=100, show_values=True):
+    first_col = pretty_print('    %s ' % name, first_col_width)
+    dims_str = '(%s) ' % ', '.join(map(str, var.dims)) if var.dims else ''
+    front_str = first_col + dims_str + ('%s ' % var.dtype)
+    if show_values:
+        # print '%s: showing values' % name
+        values_str = format_array_flat(var, max_width - len(front_str))
+    else:
+        values_str = '...'
+    return front_str + values_str
+
+
+def coords_repr(coords):
+    col_width = (max(len(str(k)) for k in coords) if coords else 0) + 5
+    summary = ['Coordinates:']
+    summary.extend(summarize_var(k, v, col_width) for k, v in coords.items())
+    return '\n'.join(summary)
+
+
+def _summarize_attributes(data, indent='    '):
+    if data.attrs:
+        attr_summary = '\n'.join('%s%s: %s' % (indent, k, v) for k, v
+                                 in iteritems(data.attrs))
+    else:
+        attr_summary = indent + 'Empty'
+    return attr_summary
+
+
 def array_repr(arr):
+    # used for DataArray, Variable and Coordinate
     if hasattr(arr, 'name') and arr.name is not None:
         name_str = '%r ' % arr.name
     else:
         name_str = ''
     dim_summary = ', '.join('%s: %s' % (k, v) for k, v
                             in zip(arr.dims, arr.shape))
     summary = ['<xray.%s %s(%s)>'% (type(arr).__name__, name_str, dim_summary)]
-    if arr.size < 1e5 or arr._in_memory():
+    if arr.size < 1e5 or arr._in_memory:
         summary.append(repr(arr.values))
     else:
         summary.append('[%s values with dtype=%s]' % (arr.size, arr.dtype))
-    if hasattr(arr, 'dataset'):
+    if hasattr(arr, 'coords'):
         if arr.coords:
-            summary.append('Coordinates:')
-            summary.append(wrap_indent(repr(arr.coords), '    '))
+            summary.append(repr(arr.coords))
         other_vars = [k for k in arr.dataset
                       if k not in arr.coords and k != arr.name]
         if other_vars:
@@ -54,45 +144,28 @@ def pretty_print(x, numchars):
         return s + ' ' * (numchars - len(s))
 
 
-def dataset_repr(ds):
+def dataset_repr(ds, preview_all_values=False):
     summary = ['<xray.%s>' % type(ds).__name__]
 
-    max_name_length = max(len(k) for k in ds.variables) if ds else 0
-    first_col_width = max(4 + max_name_length, 16)
+    max_name_length = max(len(str(k)) for k in ds.variables) if ds else 0
+    first_col_width = max(5 + max_name_length, 16)
     coords_str = pretty_print('Dimensions:', first_col_width)
     all_dim_strings = ['%s: %s' % (k, v) for k, v in iteritems(ds.dims)]
     summary.append('%s(%s)' % (coords_str, ', '.join(all_dim_strings)))
 
-    def summarize_var(k, not_found=' ', found=int):
-        v = ds.variables[k]
-        dim_strs = []
-        for n, d in enumerate(ds.dims):
-            length = len(all_dim_strings[n])
-            prepend = ' ' * (length // 2)
-            if d in v.dims:
-                if found is int:
-                    indicator = str(v.dims.index(d))
-                else:
-                    indicator = found
-            else:
-                indicator = not_found
-            dim_strs.append(pretty_print(prepend + indicator, length))
-        string = pretty_print('    ' + k, first_col_width) + ' '
-        string += '  '.join(dim_strs)
-        return string
-
-    def summarize_variables(variables, not_found=' ', found=int):
-        if variables:
-            return [summarize_var(k, not_found, found) for k in variables]
-        else:
-            return ['    None']
+    def summarize_variables(variables, always_show_values):
+        return ([summarize_var(v.name, v, first_col_width,
+                               show_values=(always_show_values or v._in_memory))
+                 for v in itervalues(variables)]
+                or ['    Empty'])
 
     summary.append('Coordinates:')
-    summary.extend(summarize_variables(ds.coords, ' ', 'X'))
+    summary.extend(summarize_variables(ds.coords, always_show_values=True))
 
     summary.append('Noncoordinates:')
-    summary.extend(summarize_variables(ds.noncoords, ' ', int))
+    summary.extend(summarize_variables(
+        ds.noncoords, always_show_values=preview_all_values))
 
-    summary.append('Attributes:\n%s' % _summarize_attributes(ds))
+    summary.append('Attributes:\n%s' % _summarize_attributes(ds, '    '))
 
     return '\n'.join(summary)
diff --git a/xray/core/variable.py b/xray/core/variable.py
@@ -236,6 +236,7 @@ def ndim(self):
     def __len__(self):
         return len(self._data)
 
+    @property
     def _in_memory(self):
         return isinstance(self._data, (NumpyArrayAdapter, PandasIndexAdapter))
 

diff --git a/xray/test/test_backends.py b/xray/test/test_backends.py
@@ -80,11 +80,11 @@ def test_load_data(self):
         def assert_loads(vars=None):
             with self.roundtrip(expected) as actual:
                 for v in actual.variables.values():
-                    self.assertFalse(v._in_memory())
+                    self.assertFalse(v._in_memory)
                 yield actual
                 for k, v in actual.variables.items():
                     if vars is None or k in vars:
-                        self.assertTrue(v._in_memory())
+                        self.assertTrue(v._in_memory)
                 self.assertDatasetAllClose(expected, actual)
 
         with self.assertRaises(AssertionError):

diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py
@@ -21,18 +21,17 @@ def test_repr(self):
         v = Variable(['time', 'x'], [[1, 2, 3], [4, 5, 6]], {'foo': 'bar'})
         data_array = Dataset({'my_variable': v, 'other': ([], 0)}
                              )['my_variable']
-        expected = dedent("""
+        expected = dedent("""\
         <xray.DataArray 'my_variable' (time: 2, x: 3)>
         array([[1, 2, 3],
                [4, 5, 6]])
         Coordinates:
-            time: Int64Index([0, 1], dtype='int64')
-            x: Int64Index([0, 1, 2], dtype='int64')
+            time (time) int64 0 1
+            x    (x) int64 0 1 2
         Linked dataset variables:
             other
         Attributes:
-            foo: bar
-        """).strip()
+            foo: bar""")
         self.assertEqual(expected, repr(data_array))
 
     def test_properties(self):
@@ -310,8 +309,9 @@ def test_coords(self):
             da.coords['foo']
 
         expected = dedent("""\
-        x: Int64Index([-1, -2], dtype='int64')
-        y: Int64Index([0, 1, 2], dtype='int64')""")
+        Coordinates:
+            x (x) int64 -1 -2
+            y (y) int64 0 1 2""")
         actual = repr(da.coords)
         self.assertEquals(expected, actual)
 

diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py
@@ -70,36 +70,37 @@ def store_variables(self):
 
 class TestDataset(TestCase):
     def test_repr(self):
-        data = create_test_data()
-        expected = dedent("""
+        data = create_test_data(seed=123)
+        # need to insert str dtype at runtime to handle both Python 2 & 3
+        expected = dedent("""\
         <xray.Dataset>
         Dimensions:     (dim1: 100, dim2: 50, dim3: 10, time: 20)
         Coordinates:
-            dim1             X
-            dim2                        X
-            dim3                                  X
-            time                                            X
+            dim1        (dim1) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 ...
+            dim2        (dim2) float64 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 ...
+            dim3        (dim3) %s 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j'
+            time        (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04 ...
         Noncoordinates:
-            var1             0          1
-            var2             0          1
-            var3             1                    0
+            var1        (dim1, dim2) float64 -1.086 0.9973 0.283 -1.506 -0.5786 1.651 -2.427 ...
+            var2        (dim1, dim2) float64 0.3188 1.511 -1.137 0.6425 -1.128 -0.5536 -0.9695 ...
+            var3        (dim3, dim1) float64 -1.241 -0.3129 -0.8489 2.378 0.6575 0.2131 -0.491 ...
         Attributes:
-            Empty
-        """).strip()
+            Empty""") % data['dim3'].dtype
         actual = '\n'.join(x.rstrip() for x in repr(data).split('\n'))
+        print(actual)
         self.assertEqual(expected, actual)
 
-        expected = dedent("""
+        expected = dedent("""\
         <xray.Dataset>
         Dimensions:     ()
         Coordinates:
-            None
+            Empty
         Noncoordinates:
-            None
-        Attributes:
             Empty
-        """).strip()
+        Attributes:
+            Empty""")
         actual = '\n'.join(x.rstrip() for x in repr(Dataset()).split('\n'))
+        print(actual)
         self.assertEqual(expected, actual)
 
     def test_constructor(self):
@@ -178,8 +179,9 @@ def test_coords_properties(self):
             data.coords[0]
 
         expected = dedent("""\
-        x: Int64Index([-1, -2], dtype='int64')
-        y: Int64Index([0, 1, 2], dtype='int64')""")
+        Coordinates:
+            x (x) int64 -1 -2
+            y (y) int64 0 1 2""")
         actual = repr(data.coords)
         self.assertEquals(expected, actual)