Skip to content

Dataset.__repr__ shows array values #220

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 25, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ matrix:
fast_finish: true
include:
- python: 2.6
env: UPDATE_ENV="conda install unittest2"
env: UPDATE_ENV="conda install unittest2 pandas==0.13.1"
# Test on Python 2.7 with and without netCDF4/scipy
- python: 2.7
env: UPDATE_ENV=""
Expand Down
13 changes: 0 additions & 13 deletions xray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,6 @@ def __getitem__(self, key):
return data


def _version_check(actual, required):
actual_tup = tuple(int(p) if p.isdigit() else p for p in actual.split('.'))
try:
return actual_tup >= required
except TypeError:
return True


def _nc4_values_and_dtype(variable):
if variable.dtype.kind in ['i', 'u', 'f'] or variable.dtype == 'S1':
values = variable.values
Expand Down Expand Up @@ -96,11 +88,6 @@ class NetCDF4DataStore(AbstractWritableDataStore):
def __init__(self, filename, mode='r', clobber=True, diskless=False,
persist=False, format='NETCDF4', group=None):
import netCDF4 as nc4
if not _version_check(nc4.__version__, (1, 0, 6)):
warnings.warn('python-netCDF4 %s detected; '
'the minimal recommended version is 1.0.6.'
% nc4.__version__, ImportWarning)

ds = nc4.Dataset(filename, mode=mode, clobber=clobber,
diskless=diskless, persist=persist,
format=format)
Expand Down
3 changes: 1 addition & 2 deletions xray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,7 @@ def __contains__(self, key):
return key in self._data.dims

def __repr__(self):
return '\n'.join(formatting.wrap_indent(repr(v.to_index()), '%s: ' % k)
for k, v in self.items())
return formatting.coords_repr(self)

@staticmethod
def _convert_to_coord(key, value, expected_size=None):
Expand Down
3 changes: 2 additions & 1 deletion xray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,8 +292,9 @@ def values(self):
def values(self, value):
self.variable.values = value

@property
def _in_memory(self):
return self.variable._in_memory()
return self.variable._in_memory

@property
def as_index(self):
Expand Down
155 changes: 114 additions & 41 deletions xray/core/formatting.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
from .pycompat import iteritems
from datetime import datetime
import itertools

import numpy as np
import pandas as pd

def _summarize_attributes(data):
if data.attrs:
attr_summary = '\n'.join(' %s: %s' % (k, v) for k, v
in iteritems(data.attrs))
else:
attr_summary = ' Empty'
return attr_summary
from .pycompat import iteritems, itervalues, unicode_type, bytes_type


def wrap_indent(text, start='', length=None):
Expand All @@ -17,22 +14,115 @@ def wrap_indent(text, start='', length=None):
return start + indent.join(x for x in text.splitlines())


def _get_indexer_at_least_n_items(shape, n_desired):
assert 0 < n_desired <= np.prod(shape)
cum_items = np.cumprod(shape[::-1])
n_steps = np.argmax(cum_items >= n_desired)
stop = int(np.ceil(float(n_desired) / np.r_[1, cum_items][n_steps]))
indexer = ((0,) * (len(shape) - 1 - n_steps) + (slice(stop),)
+ (slice(None),) * n_steps)
return indexer


def first_n_items(x, n_desired):
"""Returns the first n_desired items of an array"""
# Unfortunately, we can't just do x.flat[:n_desired] here because x might
# not be a numpy.ndarray. Moreover, access to elements of x could be very
# expensive (e.g. if it's only available over DAP), so go out of our way to
# get them in a single call to __getitem__ using only slices.
if n_desired < 1:
raise ValueError('must request at least one item')
if n_desired < x.size:
indexer = _get_indexer_at_least_n_items(x.shape, n_desired)
x = x[indexer]
return np.asarray(x).flat[:n_desired]


def format_item(x):
"""Returns a succinct summary of an object as a string"""
if isinstance(x, (np.datetime64, datetime)):
date_str, time_str = str(pd.Timestamp(x)).split()
if time_str == '00:00:00':
return date_str
else:
return '%sT%s' % (date_str, time_str)
elif isinstance(x, (unicode_type, bytes_type)):
return repr(x)
elif isinstance(x, (float, np.float)):
return '{0:.4}'.format(x)
else:
return str(x)


def format_array_flat(items_ndarray, max_width):
"""Return a formatted string for as many items in the flattened version of
items_ndarray that will fit within max_width characters
"""
# every item will take up at least two characters
max_possibly_relevant = int(np.ceil(max_width / 2.0))
relevant_items = first_n_items(items_ndarray, max_possibly_relevant)
pprint_items = list(map(format_item, relevant_items))

end_padding = ' ...'

cum_len = np.cumsum([len(s) + 1 for s in pprint_items])
gt_max_width = cum_len > (max_width - len(end_padding))
if not gt_max_width.any():
num_to_print = len(pprint_items)
else:
num_to_print = max(np.argmax(gt_max_width) - 1, 1)

pprint_str = ' '.join(itertools.islice(pprint_items, int(num_to_print)))
remaining_chars = max_width - len(pprint_str) - len(end_padding)
if remaining_chars > 0 and num_to_print < items_ndarray.size:
pprint_str += end_padding
return pprint_str


def summarize_var(name, var, first_col_width, max_width=100, show_values=True):
first_col = pretty_print(' %s ' % name, first_col_width)
dims_str = '(%s) ' % ', '.join(map(str, var.dims)) if var.dims else ''
front_str = first_col + dims_str + ('%s ' % var.dtype)
if show_values:
# print '%s: showing values' % name
values_str = format_array_flat(var, max_width - len(front_str))
else:
values_str = '...'
return front_str + values_str


def coords_repr(coords):
col_width = (max(len(str(k)) for k in coords) if coords else 0) + 5
summary = ['Coordinates:']
summary.extend(summarize_var(k, v, col_width) for k, v in coords.items())
return '\n'.join(summary)


def _summarize_attributes(data, indent=' '):
if data.attrs:
attr_summary = '\n'.join('%s%s: %s' % (indent, k, v) for k, v
in iteritems(data.attrs))
else:
attr_summary = indent + 'Empty'
return attr_summary


def array_repr(arr):
# used for DataArray, Variable and Coordinate
if hasattr(arr, 'name') and arr.name is not None:
name_str = '%r ' % arr.name
else:
name_str = ''
dim_summary = ', '.join('%s: %s' % (k, v) for k, v
in zip(arr.dims, arr.shape))
summary = ['<xray.%s %s(%s)>'% (type(arr).__name__, name_str, dim_summary)]
if arr.size < 1e5 or arr._in_memory():
if arr.size < 1e5 or arr._in_memory:
summary.append(repr(arr.values))
else:
summary.append('[%s values with dtype=%s]' % (arr.size, arr.dtype))
if hasattr(arr, 'dataset'):
if hasattr(arr, 'coords'):
if arr.coords:
summary.append('Coordinates:')
summary.append(wrap_indent(repr(arr.coords), ' '))
summary.append(repr(arr.coords))
other_vars = [k for k in arr.dataset
if k not in arr.coords and k != arr.name]
if other_vars:
Expand All @@ -54,45 +144,28 @@ def pretty_print(x, numchars):
return s + ' ' * (numchars - len(s))


def dataset_repr(ds):
def dataset_repr(ds, preview_all_values=False):
summary = ['<xray.%s>' % type(ds).__name__]

max_name_length = max(len(k) for k in ds.variables) if ds else 0
first_col_width = max(4 + max_name_length, 16)
max_name_length = max(len(str(k)) for k in ds.variables) if ds else 0
first_col_width = max(5 + max_name_length, 16)
coords_str = pretty_print('Dimensions:', first_col_width)
all_dim_strings = ['%s: %s' % (k, v) for k, v in iteritems(ds.dims)]
summary.append('%s(%s)' % (coords_str, ', '.join(all_dim_strings)))

def summarize_var(k, not_found=' ', found=int):
v = ds.variables[k]
dim_strs = []
for n, d in enumerate(ds.dims):
length = len(all_dim_strings[n])
prepend = ' ' * (length // 2)
if d in v.dims:
if found is int:
indicator = str(v.dims.index(d))
else:
indicator = found
else:
indicator = not_found
dim_strs.append(pretty_print(prepend + indicator, length))
string = pretty_print(' ' + k, first_col_width) + ' '
string += ' '.join(dim_strs)
return string

def summarize_variables(variables, not_found=' ', found=int):
if variables:
return [summarize_var(k, not_found, found) for k in variables]
else:
return [' None']
def summarize_variables(variables, always_show_values):
return ([summarize_var(v.name, v, first_col_width,
show_values=(always_show_values or v._in_memory))
for v in itervalues(variables)]
or [' Empty'])

summary.append('Coordinates:')
summary.extend(summarize_variables(ds.coords, ' ', 'X'))
summary.extend(summarize_variables(ds.coords, always_show_values=True))

summary.append('Noncoordinates:')
summary.extend(summarize_variables(ds.noncoords, ' ', int))
summary.extend(summarize_variables(
ds.noncoords, always_show_values=preview_all_values))

summary.append('Attributes:\n%s' % _summarize_attributes(ds))
summary.append('Attributes:\n%s' % _summarize_attributes(ds, ' '))

return '\n'.join(summary)
1 change: 1 addition & 0 deletions xray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ def ndim(self):
def __len__(self):
return len(self._data)

@property
def _in_memory(self):
return isinstance(self._data, (NumpyArrayAdapter, PandasIndexAdapter))

Expand Down
4 changes: 2 additions & 2 deletions xray/test/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,11 @@ def test_load_data(self):
def assert_loads(vars=None):
with self.roundtrip(expected) as actual:
for v in actual.variables.values():
self.assertFalse(v._in_memory())
self.assertFalse(v._in_memory)
yield actual
for k, v in actual.variables.items():
if vars is None or k in vars:
self.assertTrue(v._in_memory())
self.assertTrue(v._in_memory)
self.assertDatasetAllClose(expected, actual)

with self.assertRaises(AssertionError):
Expand Down
14 changes: 7 additions & 7 deletions xray/test/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,17 @@ def test_repr(self):
v = Variable(['time', 'x'], [[1, 2, 3], [4, 5, 6]], {'foo': 'bar'})
data_array = Dataset({'my_variable': v, 'other': ([], 0)}
)['my_variable']
expected = dedent("""
expected = dedent("""\
<xray.DataArray 'my_variable' (time: 2, x: 3)>
array([[1, 2, 3],
[4, 5, 6]])
Coordinates:
time: Int64Index([0, 1], dtype='int64')
x: Int64Index([0, 1, 2], dtype='int64')
time (time) int64 0 1
x (x) int64 0 1 2
Linked dataset variables:
other
Attributes:
foo: bar
""").strip()
foo: bar""")
self.assertEqual(expected, repr(data_array))

def test_properties(self):
Expand Down Expand Up @@ -310,8 +309,9 @@ def test_coords(self):
da.coords['foo']

expected = dedent("""\
x: Int64Index([-1, -2], dtype='int64')
y: Int64Index([0, 1, 2], dtype='int64')""")
Coordinates:
x (x) int64 -1 -2
y (y) int64 0 1 2""")
actual = repr(da.coords)
self.assertEquals(expected, actual)

Expand Down
38 changes: 20 additions & 18 deletions xray/test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,36 +70,37 @@ def store_variables(self):

class TestDataset(TestCase):
def test_repr(self):
data = create_test_data()
expected = dedent("""
data = create_test_data(seed=123)
# need to insert str dtype at runtime to handle both Python 2 & 3
expected = dedent("""\
<xray.Dataset>
Dimensions: (dim1: 100, dim2: 50, dim3: 10, time: 20)
Coordinates:
dim1 X
dim2 X
dim3 X
time X
dim1 (dim1) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 ...
dim2 (dim2) float64 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 ...
dim3 (dim3) %s 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j'
time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04 ...
Noncoordinates:
var1 0 1
var2 0 1
var3 1 0
var1 (dim1, dim2) float64 -1.086 0.9973 0.283 -1.506 -0.5786 1.651 -2.427 ...
var2 (dim1, dim2) float64 0.3188 1.511 -1.137 0.6425 -1.128 -0.5536 -0.9695 ...
var3 (dim3, dim1) float64 -1.241 -0.3129 -0.8489 2.378 0.6575 0.2131 -0.491 ...
Attributes:
Empty
""").strip()
Empty""") % data['dim3'].dtype
actual = '\n'.join(x.rstrip() for x in repr(data).split('\n'))
print(actual)
self.assertEqual(expected, actual)

expected = dedent("""
expected = dedent("""\
<xray.Dataset>
Dimensions: ()
Coordinates:
None
Empty
Noncoordinates:
None
Attributes:
Empty
""").strip()
Attributes:
Empty""")
actual = '\n'.join(x.rstrip() for x in repr(Dataset()).split('\n'))
print(actual)
self.assertEqual(expected, actual)

def test_constructor(self):
Expand Down Expand Up @@ -178,8 +179,9 @@ def test_coords_properties(self):
data.coords[0]

expected = dedent("""\
x: Int64Index([-1, -2], dtype='int64')
y: Int64Index([0, 1, 2], dtype='int64')""")
Coordinates:
x (x) int64 -1 -2
y (y) int64 0 1 2""")
actual = repr(data.coords)
self.assertEquals(expected, actual)

Expand Down
Loading