From b30374495b96e1cecb7349f4cd5e2c39cf3e91a1 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 12 May 2014 14:25:50 -0700 Subject: [PATCH 01/17] convert array of netCDF4.datetime objects to numpy.datetime64 array to support virtual_variable indexing, #121 --- test/test_conventions.py | 13 ++++++++----- xray/conventions.py | 20 ++++++++++++-------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/test/test_conventions.py b/test/test_conventions.py index 9a62d2e3302..b65dc41cc45 100644 --- a/test/test_conventions.py +++ b/test/test_conventions.py @@ -125,12 +125,15 @@ def test_decoded_cf_datetime_array(self): self.assertEqual(actual.dtype, np.dtype('datetime64[ns]')) self.assertArrayEqual(actual, expected) - num_dates = [722000, 720000.5] - units = 'days since 0001-01-01 0:0:0' calendar = 'noleap' - actual = conventions.DecodedCFDatetimeArray(num_dates, units, calendar) - expected = nc4.num2date(num_dates, units, calendar) - self.assertEqual(actual.dtype, np.dtype('O')) + units = 'days since 0001-01-01' + times = pd.date_range('2001-01-01-00', end='2001-07-31-23', freq='H') + noleap_time = nc4.date2num(times.to_pydatetime(), units, + calendar=calendar) + expected = times.values + actual = conventions.decode_cf_datetime(noleap_time, units, + calendar=calendar) + self.assertEqual(actual.dtype, np.dtype('M8[ns]')) self.assertArrayEqual(actual, expected) @requires_netCDF4 diff --git a/xray/conventions.py b/xray/conventions.py index 913d3317f3e..fdb7fcb393c 100644 --- a/xray/conventions.py +++ b/xray/conventions.py @@ -88,7 +88,8 @@ def nan_safe_num2date(num): if ((calendar not in _STANDARD_CALENDARS or min_date.year < 1678 or max_date.year >= 2262) and min_date is not pd.NaT): - dates = nc4.num2date(num_dates, units, calendar) + ncdates = nc4.num2date(num_dates, units, calendar) + dates = nctime_to_nptime(ncdates) else: # we can safely use np.datetime64 with nanosecond precision (pandas # likes ns precision so it can directly make DatetimeIndex objects) @@ -144,6 +145,15 @@ def guess_time_units(dates): return '%s since %s' % (time_unit, dates[0]) +def nctime_to_nptime(times): + """Given an array of netCDF4.datetime objects, return an array of + numpy.datetime64 objects""" + new = np.empty(len(times), dtype='M8[ns]') + for i, t in enumerate(times): + new[i] = np.datetime64(datetime(*t.timetuple()[:6])) + return new + + def encode_cf_datetime(dates, units=None, calendar=None): """Given an array of datetime objects, returns the tuple `(num, units, calendar)` suitable for a CF complient time variable. @@ -246,13 +256,7 @@ def __init__(self, array, units, calendar=None): @property def dtype(self): - if self.calendar is None or self.calendar in _STANDARD_CALENDARS: - # TODO: return the proper dtype (object) for a standard calendar - # that can't be expressed in ns precision. Perhaps we could guess - # this from the units? - return np.dtype('datetime64[ns]') - else: - return np.dtype('O') + return np.dtype('datetime64[ns]') def __getitem__(self, key): return decode_cf_datetime(self.array, units=self.units, From 9dfe284a4df7c8fbe972ed19c6b02b137e887f7a Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 12 May 2014 16:50:26 -0700 Subject: [PATCH 02/17] restrict the conversion of netCDF4.datetime objects to numpy.datetime64 object to valid years (1677= 2262) and min_date is not pd.NaT): - ncdates = nc4.num2date(num_dates, units, calendar) - dates = nctime_to_nptime(ncdates) + + dates = nc4.num2date(num_dates, units, calendar) + + if min_date.year >= 1678 and max_date.year < 2262: + dates = nctime_to_nptime(dates) else: # we can safely use np.datetime64 with nanosecond precision (pandas # likes ns precision so it can directly make DatetimeIndex objects) From edcc854f0c99282f5d9eacf46aa4c7cde4ede18f Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 12 May 2014 20:54:54 -0700 Subject: [PATCH 03/17] Handle arbitrary shape time arrays in nctime_to_nptime. Also added the ability to fallback to netCDF4.datetime objects since this solution does not fix the problem for all calendars in all situations. --- test/test_conventions.py | 57 ++++++++++++++++++++++++++++++++++------ xray/conventions.py | 20 ++++++++++---- 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/test/test_conventions.py b/test/test_conventions.py index b65dc41cc45..1d020a9d6fb 100644 --- a/test/test_conventions.py +++ b/test/test_conventions.py @@ -111,8 +111,6 @@ def test_cf_datetime(self): @requires_netCDF4 def test_decoded_cf_datetime_array(self): - import netCDF4 as nc4 - actual = conventions.DecodedCFDatetimeArray( [0, 1, 2], 'days since 1900-01-01', 'standard') expected = pd.date_range('1900-01-01', periods=3).values @@ -125,16 +123,59 @@ def test_decoded_cf_datetime_array(self): self.assertEqual(actual.dtype, np.dtype('datetime64[ns]')) self.assertArrayEqual(actual, expected) + @requires_netCDF4 + def test_decode_non_standard_calendar(self): + import netCDF4 as nc4 + + for calendar in ['noleap', '365_day', '360_day', 'julian', 'all_leap', + '366_day']: + units = 'days since 0001-01-01' + times = pd.date_range('2001-04-01-00', end='2001-04-30-23', + freq='H') + noleap_time = nc4.date2num(times.to_pydatetime(), units, + calendar=calendar) + expected = times.values + actual = conventions.decode_cf_datetime(noleap_time, units, + calendar=calendar) + self.assertEqual(actual.dtype, np.dtype('M8[ns]')) + self.assertArrayEqual(actual, expected) + + @requires_netCDF4 + def test_decode_non_standard_calendar_multidim_time(self): + import netCDF4 as nc4 + calendar = 'noleap' units = 'days since 0001-01-01' - times = pd.date_range('2001-01-01-00', end='2001-07-31-23', freq='H') - noleap_time = nc4.date2num(times.to_pydatetime(), units, - calendar=calendar) - expected = times.values - actual = conventions.decode_cf_datetime(noleap_time, units, + times1 = pd.date_range('2001-04-01', end='2001-04-05', freq='D') + times2 = pd.date_range('2001-05-01', end='2001-05-05', freq='D') + noleap_time1 = nc4.date2num(times1.to_pydatetime(), units, + calendar=calendar) + noleap_time2 = nc4.date2num(times2.to_pydatetime(), units, + calendar=calendar) + mdim_time = np.empty((len(noleap_time1), 2), ) + mdim_time[:, 0] = noleap_time1 + mdim_time[:, 1] = noleap_time2 + + expected1 = times1.values + expected2 = times2.values + actual = conventions.decode_cf_datetime(mdim_time, units, calendar=calendar) self.assertEqual(actual.dtype, np.dtype('M8[ns]')) - self.assertArrayEqual(actual, expected) + self.assertArrayEqual(actual[:, 0], expected1) + self.assertArrayEqual(actual[:, 1], expected2) + + @requires_netCDF4 + def test_decode_non_calendar_fallback(self): + import netCDF4 as nc4 + for year in [2010, 2011, 2012, 2013, 2014]: + calendar = '360_day' + units = 'days since {0}-01-01'.format(year) + num_times = np.arange(100) + expected = nc4.num2date(num_times, units, calendar) + actual = conventions.decode_cf_datetime(num_times, units, + calendar=calendar) + self.assertEqual(actual.dtype, np.dtype('O')) + self.assertArrayEqual(actual, expected) @requires_netCDF4 def test_cf_datetime_nan(self): diff --git a/xray/conventions.py b/xray/conventions.py index 900f8026b59..39b1df5f321 100644 --- a/xray/conventions.py +++ b/xray/conventions.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd +import warnings from collections import defaultdict, OrderedDict from datetime import datetime @@ -90,9 +91,17 @@ def nan_safe_num2date(num): and min_date is not pd.NaT): dates = nc4.num2date(num_dates, units, calendar) - + if min_date.year >= 1678 and max_date.year < 2262: - dates = nctime_to_nptime(dates) + try: + dates = nctime_to_nptime(dates) + except ValueError as e: + warnings.warn(str(e)) + warnings.warn('Unable to decode time axis into full ' + 'numpy.datetime64 objects, continuing using ' + 'dummy netCDF4.datetime objects instead', + RuntimeWarning, stacklevel=2) + pass else: # we can safely use np.datetime64 with nanosecond precision (pandas # likes ns precision so it can directly make DatetimeIndex objects) @@ -126,6 +135,7 @@ def nan_safe_num2date(num): + np.datetime64(min_date)) # restore original shape and ensure dates are given in ns dates = dates.reshape(num_dates.shape).astype('M8[ns]') + return dates @@ -150,9 +160,9 @@ def guess_time_units(dates): def nctime_to_nptime(times): """Given an array of netCDF4.datetime objects, return an array of - numpy.datetime64 objects""" - new = np.empty(len(times), dtype='M8[ns]') - for i, t in enumerate(times): + numpy.datetime64 objects of the same size""" + new = np.empty(times.shape, dtype='M8[ns]') + for i, t in np.ndenumerate(times): new[i] = np.datetime64(datetime(*t.timetuple()[:6])) return new From 0d6a745897c21551d32205439bda47dfc306c049 Mon Sep 17 00:00:00 2001 From: Alistair Miles Date: Tue, 13 May 2014 14:09:07 +0100 Subject: [PATCH 04/17] initial implementation of support for NetCDF groups --- doc/tutorial.rst | 6 ++++++ test/test_backends.py | 39 +++++++++++++++++++++++++++++++++++++ xray/backends/netCDF4_.py | 41 +++++++++++++++++++++++++++++++++++---- 3 files changed, 82 insertions(+), 4 deletions(-) diff --git a/doc/tutorial.rst b/doc/tutorial.rst index d8339cef927..bfec4676251 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -807,6 +807,12 @@ We can load NetCDF files to create a new Dataset using the Attributes: title: example attribute +A dataset can also be loaded from a specific group within a NetCDF +file. To load from a group, pass a ``group`` keyword argument to the +``open_dataset`` function. The group can be specified as a path-like +string, e.g., to access subgroup 'bar' within group 'foo' pass +'/foo/bar' as the ``group`` argument. + Data is loaded lazily from NetCDF files. You can manipulate, slice and subset Dataset and DataArray objects, and no array values are loaded into memory until necessary. For an example of how these lazy arrays work, since the OpenDAP diff --git a/test/test_backends.py b/test/test_backends.py index 8cba7a81a3f..45f22642a96 100644 --- a/test/test_backends.py +++ b/test/test_backends.py @@ -164,6 +164,45 @@ def test_open_encodings(self): if k in expected['time'].encoding} self.assertDictEqual(actual_encoding, expected['time'].encoding) + def test_open_group(self): + # Create a netCDF file with a dataset stored within a group + with create_tmp_file() as tmp_file: + rootgrp = nc4.Dataset(tmp_file, 'w') + foogrp = rootgrp.createGroup('foo') + ds = foogrp + ds.createDimension('time', size=10) + x = np.arange(10) + ds.createVariable('x', np.int32, dimensions=('time',)) + ds.variables['x'][:] = x + rootgrp.close() + + expected = Dataset() + expected['x'] = ('time', x) + + for group in 'foo', '/foo', '/foo/': # equivalent ways to specify group + actual = open_dataset(tmp_file, group=group) + self.assertVariableEqual(actual['x'], expected['x']) + + def test_open_subgroup(self): + # Create a netCDF file with a dataset stored within a group within a group + with create_tmp_file() as tmp_file: + rootgrp = nc4.Dataset(tmp_file, 'w') + foogrp = rootgrp.createGroup('foo') + bargrp = foogrp.createGroup('bar') + ds = bargrp + ds.createDimension('time', size=10) + x = np.arange(10) + ds.createVariable('x', np.int32, dimensions=('time',)) + ds.variables['x'][:] = x + rootgrp.close() + + expected = Dataset() + expected['x'] = ('time', x) + + for group in 'foo/bar', '/foo/bar', '/foo/bar/': # equivalent ways to specify group + actual = open_dataset(tmp_file, group=group) + self.assertVariableEqual(actual['x'], expected['x']) + def test_dump_and_open_encodings(self): # Create a netCDF file with explicit time units # and make sure it makes it into the encodings diff --git a/xray/backends/netCDF4_.py b/xray/backends/netCDF4_.py index f01b82dc6ad..913582faf42 100644 --- a/xray/backends/netCDF4_.py +++ b/xray/backends/netCDF4_.py @@ -62,22 +62,55 @@ def _nc4_values_and_dtype(variable): return values, dtype +def _nc4_group(ds, group): + if group in {None, '', '/'}: + # use the root group + return ds + else: + # make sure it's a string (maybe should raise an error if not?) + group = str(group) + # support path-like syntax + path = group.strip('/').split('/') + # find the specified group by recursive search + return _nc4_group_from_path(ds, path, set([ds])) + + +def _nc4_group_from_path(parent, path, visited): + key = path.pop(0) + if key not in parent.groups: + # TODO more specific exception type? + raise Exception('group not found: %r, %s' % (parent, key)) + else: + parent = parent.groups[key] + if parent in visited: + # TODO more specific exception type? + raise Exception('encountered circular group structure') + elif len(path) > 0: + # recurse + visited.add(parent) + return _nc4_group_from_path(parent, path, visited) + else: + return parent + + class NetCDF4DataStore(AbstractWritableDataStore): """Store for reading and writing data via the Python-NetCDF4 library. This store supports NetCDF3, NetCDF4 and OpenDAP datasets. """ def __init__(self, filename, mode='r', clobber=True, diskless=False, - persist=False, format='NETCDF4'): + persist=False, format='NETCDF4', group=None): import netCDF4 as nc4 if not _version_check(nc4.__version__, (1, 0, 6)): warnings.warn('python-netCDF4 %s detected; ' 'the minimal recommended version is 1.0.6.' % nc4.__version__, ImportWarning) - self.ds = nc4.Dataset(filename, mode=mode, clobber=clobber, - diskless=diskless, persist=persist, - format=format) + ds = nc4.Dataset(filename, mode=mode, clobber=clobber, + diskless=diskless, persist=persist, + format=format) + # support use of groups + self.ds = _nc4_group(ds, group) self.format = format def open_store_variable(self, var): From e41048c3ed3cfb43a6e35efbc1c041814cedae72 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 13 May 2014 10:19:41 -0700 Subject: [PATCH 05/17] cleanup fallback code a bit --- xray/conventions.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/xray/conventions.py b/xray/conventions.py index 39b1df5f321..dcf8e67b7e5 100644 --- a/xray/conventions.py +++ b/xray/conventions.py @@ -96,12 +96,10 @@ def nan_safe_num2date(num): try: dates = nctime_to_nptime(dates) except ValueError as e: - warnings.warn(str(e)) warnings.warn('Unable to decode time axis into full ' 'numpy.datetime64 objects, continuing using ' - 'dummy netCDF4.datetime objects instead', - RuntimeWarning, stacklevel=2) - pass + 'dummy netCDF4.datetime objects instead, reason:' + '{0}'.format(e), RuntimeWarning, stacklevel=2) else: # we can safely use np.datetime64 with nanosecond precision (pandas # likes ns precision so it can directly make DatetimeIndex objects) From c406d2b69938d127baa6d8e173afebcbd586c210 Mon Sep 17 00:00:00 2001 From: Alistair Miles Date: Wed, 14 May 2014 00:07:40 +0100 Subject: [PATCH 06/17] use IOError; safer recursion; don't worry about group cycles --- xray/backends/netCDF4_.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/xray/backends/netCDF4_.py b/xray/backends/netCDF4_.py index 913582faf42..4ac0ed67448 100644 --- a/xray/backends/netCDF4_.py +++ b/xray/backends/netCDF4_.py @@ -72,23 +72,19 @@ def _nc4_group(ds, group): # support path-like syntax path = group.strip('/').split('/') # find the specified group by recursive search - return _nc4_group_from_path(ds, path, set([ds])) + return _nc4_group_from_path(ds, path) -def _nc4_group_from_path(parent, path, visited): - key = path.pop(0) +def _nc4_group_from_path(parent, path): + key = path[0] + path = path[1:] if key not in parent.groups: - # TODO more specific exception type? - raise Exception('group not found: %r, %s' % (parent, key)) + raise IOError('group not found: %r, %s' % (parent, key)) else: parent = parent.groups[key] - if parent in visited: - # TODO more specific exception type? - raise Exception('encountered circular group structure') - elif len(path) > 0: + if len(path) > 0: # recurse - visited.add(parent) - return _nc4_group_from_path(parent, path, visited) + return _nc4_group_from_path(parent, path) else: return parent From ec778a755bd386c65fd0b4eab7061b6a654d74a4 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 13 May 2014 18:15:48 -0700 Subject: [PATCH 07/17] Require only numpy 1.7 for the benefit of readthedocs ReadTheDocs comes with pre-built packages for the basic scientific python stack, but some of these packages are old (e.g., numpy is 1.7.1). The only way to upgrade packages on readthedocs is to use a virtual environment and a requirements.txt. Unfortunately, this means we can't upgrade both numpy and pandas simultaneously, because pandas may get built first and link against the wrong version of numpy. We inadvertantly stumbled upon a work around to build the "latest" docs by first installing numpy in the (cached) virtual environment, and then later (in another commit), adding pandas to the requirements.txt file. However, this is a real hack and makes it impossible to maintain different versions of the docs, such as for tagged releases. Accordingly, this commit relaxes the numpy version requirement so we can use a version that readthedocs already has installed. (We actually don't really need a newer version of numpy for any current functionality in xray, although it's nice to have for support for missing value functions like nanmean.) --- doc/conf.py | 7 ++++++- doc/requirements.txt | 5 +++-- doc/tutorial.rst | 11 +++++++++++ setup.py | 4 ++-- 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 4d94e81f46f..564120c059b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -22,6 +22,11 @@ print "numpy: %s, %s" % (numpy.__version__, numpy.__file__) except ImportError: print "no numpy" +try: + import scipy + print "scipy: %s, %s" % (scipy.__version__, scipy.__file__) +except ImportError: + print "no scipy" try: import pandas print "pandas: %s, %s" % (pandas.__version__, pandas.__file__) @@ -68,7 +73,7 @@ def __getattr__(cls, name): else: return Mock() -MOCK_MODULES = ['netCDF4', 'scipy', 'scipy.io'] +MOCK_MODULES = ['netCDF4'] for mod_name in MOCK_MODULES: sys.modules[mod_name] = Mock() diff --git a/doc/requirements.txt b/doc/requirements.txt index 852a47407eb..edd1774fab1 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,8 +1,9 @@ # only the dependencies required to build xray's docs # all others (netCDF4, scipy) are mocked out in conf.py -numpy==1.8.1 +numpy>=1.7 ipython==2.0.0 pandas==0.13.1 -six python-dateutil +scipy +six matplotlib diff --git a/doc/tutorial.rst b/doc/tutorial.rst index bfec4676251..93bb26d1432 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -391,6 +391,17 @@ skip missing values, but we expect to switch to NA skipping versions (like pandas) in the future. For now, you can do NA skipping aggregate by passing NA aware numpy functions to the :py:attr:`~xray.DataArray.reduce` method: +.. ipython:: python + :suppress: + + # monkey patch numpy with nanmean from scipy.stats so the docs can build + # even with numpy 1.7 (np.nanmean first appears in numpy 1.8). + # this is to work around an unfortunate limitation of readthedocs/pip which + # stops us from upgrading both numpy and pandas. + + from scipy import stats + np.nanmean = stats.nanmean + .. ipython:: python foo.reduce(np.nanmean, 'time') diff --git a/setup.py b/setup.py index 31475e9325d..4620f258c8c 100644 --- a/setup.py +++ b/setup.py @@ -153,8 +153,8 @@ def write_version_py(filename=None): classifiers=CLASSIFIERS, description=DESCRIPTION, long_description=LONG_DESCRIPTION, - install_requires=['numpy >= 1.8', 'pandas >= 0.13.1'], - tests_require=['mock >= 1.0.1', 'nose >= 1.0'], + install_requires=['numpy >= 1.7', 'pandas >= 0.13.1'], + tests_require=['nose >= 1.0'], url=URL, test_suite='nose.collector', packages=['xray', 'xray.backends']) From 1c6b0323a3232d5a4744c9613ce5183be5814a19 Mon Sep 17 00:00:00 2001 From: Alistair Miles Date: Wed, 14 May 2014 10:16:51 +0100 Subject: [PATCH 08/17] simplify group access to avoid recursion; test missing group error --- .gitignore | 3 +++ test/test_backends.py | 6 ++++++ xray/backends/netCDF4_.py | 30 +++++++++++------------------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index 301a2745dd5..5c75dccd9ac 100644 --- a/.gitignore +++ b/.gitignore @@ -34,6 +34,9 @@ nosetests.xml .project .pydevproject +# PyCharm +.idea + # xray specific doc/_build doc/generated diff --git a/test/test_backends.py b/test/test_backends.py index 45f22642a96..8d62ab867cc 100644 --- a/test/test_backends.py +++ b/test/test_backends.py @@ -183,6 +183,12 @@ def test_open_group(self): actual = open_dataset(tmp_file, group=group) self.assertVariableEqual(actual['x'], expected['x']) + # check that missing group raises appropriate exception + try: + open_dataset(tmp_file, group='bar') + except IOError: + pass # expected + def test_open_subgroup(self): # Create a netCDF file with a dataset stored within a group within a group with create_tmp_file() as tmp_file: diff --git a/xray/backends/netCDF4_.py b/xray/backends/netCDF4_.py index 4ac0ed67448..d66864e6409 100644 --- a/xray/backends/netCDF4_.py +++ b/xray/backends/netCDF4_.py @@ -63,30 +63,22 @@ def _nc4_values_and_dtype(variable): def _nc4_group(ds, group): - if group in {None, '', '/'}: + if group in set([None, '', '/']): # use the root group return ds else: - # make sure it's a string (maybe should raise an error if not?) - group = str(group) + # make sure it's a string + if not isinstance(group, basestring): + raise ValueError('group must be a string or None') # support path-like syntax path = group.strip('/').split('/') - # find the specified group by recursive search - return _nc4_group_from_path(ds, path) - - -def _nc4_group_from_path(parent, path): - key = path[0] - path = path[1:] - if key not in parent.groups: - raise IOError('group not found: %r, %s' % (parent, key)) - else: - parent = parent.groups[key] - if len(path) > 0: - # recurse - return _nc4_group_from_path(parent, path) - else: - return parent + for key in path: + try: + ds = ds.groups[key] + except KeyError as e: + # wrap error to provide slightly more helpful message + raise IOError('group not found: %s' % key, e) + return ds class NetCDF4DataStore(AbstractWritableDataStore): From 8d95d3820e36f2ddf88f54c6b49fe61424e9a3a5 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Wed, 14 May 2014 17:33:17 -0700 Subject: [PATCH 09/17] Add further testing for single element time ordinals. Expanded warnings for when non numpy.datetime64 arrays are returned. Use context manager for warnings in test_conventions.py. --- test/test_conventions.py | 35 +++++++++++++++++++++++++++++------ xray/conventions.py | 6 ++++++ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/test/test_conventions.py b/test/test_conventions.py index 1d020a9d6fb..6daf1ff5fb3 100644 --- a/test/test_conventions.py +++ b/test/test_conventions.py @@ -135,11 +135,29 @@ def test_decode_non_standard_calendar(self): noleap_time = nc4.date2num(times.to_pydatetime(), units, calendar=calendar) expected = times.values - actual = conventions.decode_cf_datetime(noleap_time, units, - calendar=calendar) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'All-NaN') + actual = conventions.decode_cf_datetime(noleap_time, units, + calendar=calendar) self.assertEqual(actual.dtype, np.dtype('M8[ns]')) self.assertArrayEqual(actual, expected) + @requires_netCDF4 + def test_decode_non_standard_calendar_single_element(self): + + for calendar in ['noleap', '365_day', '360_day', 'julian', 'all_leap', + '366_day']: + units = 'days since 0001-01-01' + + for num_time in [735368, [735368], [[735368]]]: + + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'All-NaN') + actual = conventions.decode_cf_datetime(num_time, units, + calendar=calendar) + self.assertTrue(actual.dtype in [np.dtype('M8[ns]'), + np.dtype('O')]) + @requires_netCDF4 def test_decode_non_standard_calendar_multidim_time(self): import netCDF4 as nc4 @@ -158,8 +176,10 @@ def test_decode_non_standard_calendar_multidim_time(self): expected1 = times1.values expected2 = times2.values - actual = conventions.decode_cf_datetime(mdim_time, units, - calendar=calendar) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'All-NaN') + actual = conventions.decode_cf_datetime(mdim_time, units, + calendar=calendar) self.assertEqual(actual.dtype, np.dtype('M8[ns]')) self.assertArrayEqual(actual[:, 0], expected1) self.assertArrayEqual(actual[:, 1], expected2) @@ -172,8 +192,11 @@ def test_decode_non_calendar_fallback(self): units = 'days since {0}-01-01'.format(year) num_times = np.arange(100) expected = nc4.num2date(num_times, units, calendar) - actual = conventions.decode_cf_datetime(num_times, units, - calendar=calendar) + + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'All-NaN') + actual = conventions.decode_cf_datetime(num_times, units, + calendar=calendar) self.assertEqual(actual.dtype, np.dtype('O')) self.assertArrayEqual(actual, expected) diff --git a/xray/conventions.py b/xray/conventions.py index dcf8e67b7e5..4d12a989aa8 100644 --- a/xray/conventions.py +++ b/xray/conventions.py @@ -100,6 +100,11 @@ def nan_safe_num2date(num): 'numpy.datetime64 objects, continuing using ' 'dummy netCDF4.datetime objects instead, reason:' '{0}'.format(e), RuntimeWarning, stacklevel=2) + else: + warnings.warn('Unable to decode time axis into full ' + 'numpy.datetime64 objects, continuing using dummy ' + 'netCDF4.datetime objects instead, reason: dates out' + ' of range', RuntimeWarning, stacklevel=2) else: # we can safely use np.datetime64 with nanosecond precision (pandas # likes ns precision so it can directly make DatetimeIndex objects) @@ -159,6 +164,7 @@ def guess_time_units(dates): def nctime_to_nptime(times): """Given an array of netCDF4.datetime objects, return an array of numpy.datetime64 objects of the same size""" + times = np.asarray(times) new = np.empty(times.shape, dtype='M8[ns]') for i, t in np.ndenumerate(times): new[i] = np.datetime64(datetime(*t.timetuple()[:6])) From e25cfcda4b3eb780914ab74aa5f279c5813b3ac8 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Wed, 14 May 2014 17:44:46 -0700 Subject: [PATCH 10/17] filter warnings from earlier tests --- test/test_conventions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_conventions.py b/test/test_conventions.py index 6daf1ff5fb3..04e0f5db79c 100644 --- a/test/test_conventions.py +++ b/test/test_conventions.py @@ -86,7 +86,9 @@ def test_cf_datetime(self): for calendar in ['standard', 'gregorian', 'proleptic_gregorian']: expected = nc4.num2date(num_dates, units, calendar) print(num_dates, units, calendar) - actual = conventions.decode_cf_datetime(num_dates, units, calendar) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'All-NaN') + actual = conventions.decode_cf_datetime(num_dates, units, calendar) if (isinstance(actual, np.ndarray) and np.issubdtype(actual.dtype, np.datetime64)): self.assertEqual(actual.dtype, np.dtype('M8[ns]')) From a3f21d328e13d4f8340a8c5030a71d3c6e89e856 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Thu, 15 May 2014 11:27:29 -0700 Subject: [PATCH 11/17] correct filter message for warnings --- test/test_conventions.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/test_conventions.py b/test/test_conventions.py index 04e0f5db79c..6c67e1c4563 100644 --- a/test/test_conventions.py +++ b/test/test_conventions.py @@ -87,7 +87,7 @@ def test_cf_datetime(self): expected = nc4.num2date(num_dates, units, calendar) print(num_dates, units, calendar) with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'All-NaN') + warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = conventions.decode_cf_datetime(num_dates, units, calendar) if (isinstance(actual, np.ndarray) and np.issubdtype(actual.dtype, np.datetime64)): @@ -138,7 +138,7 @@ def test_decode_non_standard_calendar(self): calendar=calendar) expected = times.values with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'All-NaN') + warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = conventions.decode_cf_datetime(noleap_time, units, calendar=calendar) self.assertEqual(actual.dtype, np.dtype('M8[ns]')) @@ -154,7 +154,7 @@ def test_decode_non_standard_calendar_single_element(self): for num_time in [735368, [735368], [[735368]]]: with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'All-NaN') + warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = conventions.decode_cf_datetime(num_time, units, calendar=calendar) self.assertTrue(actual.dtype in [np.dtype('M8[ns]'), @@ -179,7 +179,7 @@ def test_decode_non_standard_calendar_multidim_time(self): expected1 = times1.values expected2 = times2.values with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'All-NaN') + warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = conventions.decode_cf_datetime(mdim_time, units, calendar=calendar) self.assertEqual(actual.dtype, np.dtype('M8[ns]')) @@ -196,7 +196,7 @@ def test_decode_non_calendar_fallback(self): expected = nc4.num2date(num_times, units, calendar) with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'All-NaN') + warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = conventions.decode_cf_datetime(num_times, units, calendar=calendar) self.assertEqual(actual.dtype, np.dtype('O')) @@ -212,7 +212,7 @@ def test_cf_datetime_nan(self): ['NaT', '2000-01-01T00:00:00Z', '2000-01-02T00:00:00Z']), ]: with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'All-NaN') + warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = conventions.decode_cf_datetime(num_dates, units) expected = np.array(expected_list, dtype='datetime64[ns]') self.assertArrayEqual(expected, actual) From 69a9aa7700fccb76cd75ce12804bc34a71aed97c Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Thu, 15 May 2014 17:01:04 -0700 Subject: [PATCH 12/17] add test to catch fallback warnings, insure returning a numpy array during fallback of single element --- test/test_conventions.py | 59 +++++++++++++++++++++++++++------------- xray/conventions.py | 3 ++ 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/test/test_conventions.py b/test/test_conventions.py index 6c67e1c4563..56413c86ba1 100644 --- a/test/test_conventions.py +++ b/test/test_conventions.py @@ -146,19 +146,35 @@ def test_decode_non_standard_calendar(self): @requires_netCDF4 def test_decode_non_standard_calendar_single_element(self): - + units = 'days since 0001-01-01' for calendar in ['noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day']: - units = 'days since 0001-01-01' - for num_time in [735368, [735368], [[735368]]]: - with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = conventions.decode_cf_datetime(num_time, units, calendar=calendar) - self.assertTrue(actual.dtype in [np.dtype('M8[ns]'), - np.dtype('O')]) + self.assertEqual(actual.dtype, np.dtype('M8[ns]')) + + @requires_netCDF4 + def test_decode_non_standard_calendar_single_element_fallback(self): + import netCDF4 as nc4 + + units = 'days since 0001-01-01' + dt = nc4.netcdftime.datetime(2001, 2, 29) + for calendar in ['360_day', 'all_leap', '366_day']: + num_time = nc4.date2num(dt, units, calendar) + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + actual = conventions.decode_cf_datetime(num_time, units, + calendar=calendar) + self.assertEqual(len(w), 1) + self.assertIn('Unable to decode time axis', + str(w[0].message)) + expected = np.asarray(nc4.num2date(num_time, units, calendar)) + print(num_time, calendar, actual, expected) + self.assertEqual(actual.dtype, np.dtype('O')) + self.assertEqual(expected, actual) @requires_netCDF4 def test_decode_non_standard_calendar_multidim_time(self): @@ -187,20 +203,25 @@ def test_decode_non_standard_calendar_multidim_time(self): self.assertArrayEqual(actual[:, 1], expected2) @requires_netCDF4 - def test_decode_non_calendar_fallback(self): + def test_decode_non_standard_calendar_fallback(self): import netCDF4 as nc4 - for year in [2010, 2011, 2012, 2013, 2014]: - calendar = '360_day' - units = 'days since {0}-01-01'.format(year) - num_times = np.arange(100) - expected = nc4.num2date(num_times, units, calendar) + for year in [2010, 2011, 2012, 2013, 2014]: # insure leap year doesn't matter + for calendar in ['360_day', '366_day', 'all_leap']: + calendar = '360_day' + units = 'days since {0}-01-01'.format(year) + num_times = np.arange(100) + expected = nc4.num2date(num_times, units, calendar) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + actual = conventions.decode_cf_datetime(num_times, units, + calendar=calendar) + self.assertEqual(len(w), 1) + self.assertIn('Unable to decode time axis', + str(w[0].message)) - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'Unable to decode time axis') - actual = conventions.decode_cf_datetime(num_times, units, - calendar=calendar) - self.assertEqual(actual.dtype, np.dtype('O')) - self.assertArrayEqual(actual, expected) + self.assertEqual(actual.dtype, np.dtype('O')) + self.assertArrayEqual(actual, expected) @requires_netCDF4 def test_cf_datetime_nan(self): @@ -212,7 +233,7 @@ def test_cf_datetime_nan(self): ['NaT', '2000-01-01T00:00:00Z', '2000-01-02T00:00:00Z']), ]: with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'Unable to decode time axis') + warnings.filterwarnings('ignore', 'All-NaN') actual = conventions.decode_cf_datetime(num_dates, units) expected = np.array(expected_list, dtype='datetime64[ns]') self.assertArrayEqual(expected, actual) diff --git a/xray/conventions.py b/xray/conventions.py index 4d12a989aa8..7bb0b53af86 100644 --- a/xray/conventions.py +++ b/xray/conventions.py @@ -100,11 +100,14 @@ def nan_safe_num2date(num): 'numpy.datetime64 objects, continuing using ' 'dummy netCDF4.datetime objects instead, reason:' '{0}'.format(e), RuntimeWarning, stacklevel=2) + dates = np.asarray(dates) else: warnings.warn('Unable to decode time axis into full ' 'numpy.datetime64 objects, continuing using dummy ' 'netCDF4.datetime objects instead, reason: dates out' ' of range', RuntimeWarning, stacklevel=2) + dates = np.asarray(dates) + else: # we can safely use np.datetime64 with nanosecond precision (pandas # likes ns precision so it can directly make DatetimeIndex objects) From 3dd0d69550efeebbb0fc2e3c182b8361052cadc5 Mon Sep 17 00:00:00 2001 From: Alistair Miles Date: Fri, 16 May 2014 01:03:48 +0100 Subject: [PATCH 13/17] fix python 3 build; right way to test exception is raised --- test/test_backends.py | 10 +++++----- xray/backends/netCDF4_.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/test_backends.py b/test/test_backends.py index 8d62ab867cc..c235ff4b929 100644 --- a/test/test_backends.py +++ b/test/test_backends.py @@ -179,15 +179,14 @@ def test_open_group(self): expected = Dataset() expected['x'] = ('time', x) - for group in 'foo', '/foo', '/foo/': # equivalent ways to specify group + # check equivalent ways to specify group + for group in 'foo', '/foo', 'foo/', '/foo/': actual = open_dataset(tmp_file, group=group) self.assertVariableEqual(actual['x'], expected['x']) # check that missing group raises appropriate exception - try: + with self.assertRaises(IOError): open_dataset(tmp_file, group='bar') - except IOError: - pass # expected def test_open_subgroup(self): # Create a netCDF file with a dataset stored within a group within a group @@ -205,7 +204,8 @@ def test_open_subgroup(self): expected = Dataset() expected['x'] = ('time', x) - for group in 'foo/bar', '/foo/bar', '/foo/bar/': # equivalent ways to specify group + # check equivalent ways to specify group + for group in 'foo/bar', '/foo/bar', 'foo/bar/', '/foo/bar/': actual = open_dataset(tmp_file, group=group) self.assertVariableEqual(actual['x'], expected['x']) diff --git a/xray/backends/netCDF4_.py b/xray/backends/netCDF4_.py index d66864e6409..0268a8c41ef 100644 --- a/xray/backends/netCDF4_.py +++ b/xray/backends/netCDF4_.py @@ -9,7 +9,7 @@ from xray.conventions import encode_cf_variable from xray.utils import FrozenOrderedDict, NDArrayMixin, as_array_or_item from xray import indexing -from xray.pycompat import iteritems +from xray.pycompat import iteritems, basestring class NetCDF4ArrayWrapper(NDArrayMixin): From 3e870e2d671c6543cbb3b0d5e85a4ccccf75e947 Mon Sep 17 00:00:00 2001 From: Alex Kleeman Date: Mon, 12 May 2014 04:18:37 -0700 Subject: [PATCH 14/17] Only copy datetime64 data if it is using non-nanosecond precision. --- test/test_utils.py | 16 ++++++++++++++++ xray/dataset.py | 1 - xray/utils.py | 4 ++-- xray/variable.py | 3 ++- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 78545630b49..8f2478c201a 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,4 +1,5 @@ from collections import OrderedDict +import datetime import numpy as np import pandas as pd @@ -6,6 +7,21 @@ from . import TestCase +class TestAsArray(TestCase): + + def test_safe_array(self): + values = np.arange(5.) + safe_values = utils.as_safe_array(values) + safe_values[0] = 5. + self.assertEqual(values[0], safe_values[0]) + + dates = [datetime.datetime(2010, 1, i + 1) for i in range(5)] + values = np.array(dates).astype(' Date: Tue, 13 May 2014 07:11:12 -0700 Subject: [PATCH 15/17] Convert datetime objects to datetime64 upon variable creation in an attempt to standardize time handling. NOTE: virtual variable slicing is now broken, so do not submit. --- test/test_utils.py | 15 +++++++++++--- test/test_variable.py | 46 +++++++++++++++++++++++++++++++++---------- xray/data_array.py | 2 ++ xray/utils.py | 8 ++++++-- xray/variable.py | 5 ++++- 5 files changed, 60 insertions(+), 16 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 8f2478c201a..2f51d29a25b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -7,9 +7,8 @@ from . import TestCase -class TestAsArray(TestCase): - - def test_safe_array(self): +class TestAsSafeArray(TestCase): + def test_as_safe_array(self): values = np.arange(5.) safe_values = utils.as_safe_array(values) safe_values[0] = 5. @@ -21,6 +20,16 @@ def test_safe_array(self): safe_values[0] = datetime.datetime(1982, 11, 20) self.assertEqual(values[0], safe_values[0]) + def test_as_safe_array_datetime(self): + dates = [datetime.datetime(2010, 1, i + 1) for i in range(5)] + values = np.array(dates) + safe_values = utils.as_safe_array(values) + safe_values[0] = datetime.datetime(1982, 11, 20) + # Note that this will fail, because as_safe_array converts + # datetime obecjts to datetime64 objects, which requires copying + #self.assertEqual(values.astype(' Date: Fri, 16 May 2014 00:52:52 -0700 Subject: [PATCH 16/17] Added utils.safe_timestamp() which avoids issues when datetime[ns] variables get cast to integers. --- test/test_variable.py | 6 ++---- xray/data_array.py | 2 -- xray/dataset.py | 4 ++-- xray/utils.py | 19 +++++++++++++++++++ 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/test/test_variable.py b/test/test_variable.py index 6a91492fb20..3c6353b31c2 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -468,6 +468,7 @@ def test_data(self): with self.assertRaisesRegexp(TypeError, 'cannot be modified'): x[:] = 0 + class TestCompatibleArray(TestCase): def test_as_compatible_array(self): @@ -480,10 +481,7 @@ def test_as_compatible_array(self): actual = _as_compatible_data(value) for attr in ['dtype', 'shape', 'size', 'ndim']: getattr(actual, attr) - try: - self.assertEqual(actual.dtype, dtype) - except: - import ipdb; ipdb.set_trace() + self.assertEqual(actual.dtype, dtype) # now do the same but as a 1-d array actual = _as_compatible_data([value]) for attr in ['dtype', 'shape', 'size', 'ndim']: diff --git a/xray/data_array.py b/xray/data_array.py index 173f7816e93..2a393cee75e 100644 --- a/xray/data_array.py +++ b/xray/data_array.py @@ -93,8 +93,6 @@ def _constructor(cls, dataset, name): """ obj = object.__new__(cls) if name not in dataset and name not in dataset.virtual_variables: - if name == 'time.dayofyear': - import ipdb; ipdb.set_trace() raise ValueError('name %r must be a variable in dataset %r' % (name, dataset)) obj._dataset = dataset diff --git a/xray/dataset.py b/xray/dataset.py index 64b63be6fde..fef85660f65 100644 --- a/xray/dataset.py +++ b/xray/dataset.py @@ -96,7 +96,7 @@ def virtual(self): """ def _castable_to_timestamp(obj): try: - pd.Timestamp(obj) + utils.safe_timestamp(obj) except: return False else: @@ -128,7 +128,7 @@ def __missing__(self, key): if isinstance(ref_var, variable.Coordinate): date = ref_var.as_index elif ref_var.ndim == 0: - date = pd.Timestamp(ref_var.values) + date = utils.safe_timestamp(ref_var.values) if suffix == 'season': # seasons = np.array(['DJF', 'MAM', 'JJA', 'SON']) diff --git a/xray/utils.py b/xray/utils.py index e716d4f04ff..41447d673d0 100644 --- a/xray/utils.py +++ b/xray/utils.py @@ -36,6 +36,25 @@ def __new__(cls, *args, **kwargs): return Wrapper +def safe_timestamp(x): + """ + This is a fix required since datetime64[ns] can occasionally get cast to + integers. Heres an example: + + > x + array(946684800000000000L, dtype='datetime64[ns]') + > pd.Timestamp(x) + ValueError: Could not construct Timestamp from argument + + + Oddly enough, recasting to datetime64 seems to fix things: + + > pd.Timestamp(np.datetime64(x)) + Timestamp('2000-01-01 00:00:00', tz=None) + """ + return pd.Timestamp(np.datetime64(x)) + + def as_safe_array(values, dtype=None): """Like np.asarray, but convert all datetime64 arrays to ns precision """ From d09708a119d8ca90298673ecd982414017ef53de Mon Sep 17 00:00:00 2001 From: Alex Kleeman Date: Fri, 16 May 2014 01:35:43 -0700 Subject: [PATCH 17/17] Updated test_variable to deal with python 3 unicode types --- test/test_variable.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/test/test_variable.py b/test/test_variable.py index 3c6353b31c2..09659c8b6f9 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -473,17 +473,19 @@ class TestCompatibleArray(TestCase): def test_as_compatible_array(self): d = datetime(2000, 1, 1) - for value, dtype in [(0, int), - (np.float32(0.5), np.float32), - ('foo', '|S3'), - (d, '