pydata · akleeman · May 12, 2014 · May 12, 2014 · May 13, 2014 · May 13, 2014
diff --git a/.gitignore b/.gitignore
@@ -34,6 +34,9 @@ nosetests.xml
 .project
 .pydevproject
 
+# PyCharm
+.idea
+
 # xray specific
 doc/_build
 doc/generated

diff --git a/doc/conf.py b/doc/conf.py
@@ -22,6 +22,11 @@
     print "numpy: %s, %s" % (numpy.__version__, numpy.__file__)
 except ImportError:
     print "no numpy"
+try:
+    import scipy
+    print "scipy: %s, %s" % (scipy.__version__, scipy.__file__)
+except ImportError:
+    print "no scipy"
 try:
     import pandas
     print "pandas: %s, %s" % (pandas.__version__, pandas.__file__)
@@ -68,7 +73,7 @@ def __getattr__(cls, name):
         else:
             return Mock()
 
-MOCK_MODULES = ['netCDF4', 'scipy', 'scipy.io']
+MOCK_MODULES = ['netCDF4']
 
 for mod_name in MOCK_MODULES:
     sys.modules[mod_name] = Mock()

diff --git a/doc/requirements.txt b/doc/requirements.txt
@@ -1,8 +1,9 @@
 # only the dependencies required to build xray's docs
 # all others (netCDF4, scipy) are mocked out in conf.py
-numpy==1.8.1
+numpy>=1.7
 ipython==2.0.0
 pandas==0.13.1
-six
 python-dateutil
+scipy
+six
 matplotlib
diff --git a/doc/tutorial.rst b/doc/tutorial.rst
@@ -391,6 +391,17 @@ skip missing values, but we expect to switch to NA skipping versions (like
 pandas) in the future. For now, you can do NA skipping aggregate by passing
 NA aware numpy functions to the :py:attr:`~xray.DataArray.reduce` method:
 
+.. ipython:: python
+    :suppress:
+
+    # monkey patch numpy with nanmean from scipy.stats so the docs can build
+    # even with numpy 1.7 (np.nanmean first appears in numpy 1.8).
+    # this is to work around an unfortunate limitation of readthedocs/pip which
+    # stops us from upgrading both numpy and pandas.
+
+    from scipy import stats
+    np.nanmean = stats.nanmean
+
 .. ipython:: python
 
     foo.reduce(np.nanmean, 'time')
@@ -807,6 +818,12 @@ We can load NetCDF files to create a new Dataset using the
     Attributes:
         title: example attribute
 
+A dataset can also be loaded from a specific group within a NetCDF
+file. To load from a group, pass a ``group`` keyword argument to the
+``open_dataset`` function. The group can be specified as a path-like
+string, e.g., to access subgroup 'bar' within group 'foo' pass
+'/foo/bar' as the ``group`` argument.
+
 Data is loaded lazily from NetCDF files. You can manipulate, slice and subset
 Dataset and DataArray objects, and no array values are loaded into memory until
 necessary. For an example of how these lazy arrays work, since the OpenDAP

diff --git a/setup.py b/setup.py
@@ -153,8 +153,8 @@ def write_version_py(filename=None):
       classifiers=CLASSIFIERS,
       description=DESCRIPTION,
       long_description=LONG_DESCRIPTION,
-      install_requires=['numpy >= 1.8', 'pandas >= 0.13.1'],
-      tests_require=['mock >= 1.0.1', 'nose >= 1.0'],
+      install_requires=['numpy >= 1.7', 'pandas >= 0.13.1'],
+      tests_require=['nose >= 1.0'],
       url=URL,
       test_suite='nose.collector',
       packages=['xray', 'xray.backends'])
diff --git a/test/test_backends.py b/test/test_backends.py
@@ -164,6 +164,51 @@ def test_open_encodings(self):
                                if k in expected['time'].encoding}
             self.assertDictEqual(actual_encoding, expected['time'].encoding)
 
+    def test_open_group(self):
+        # Create a netCDF file with a dataset stored within a group
+        with create_tmp_file() as tmp_file:
+            rootgrp = nc4.Dataset(tmp_file, 'w')
+            foogrp = rootgrp.createGroup('foo')
+            ds = foogrp
+            ds.createDimension('time', size=10)
+            x = np.arange(10)
+            ds.createVariable('x', np.int32, dimensions=('time',))
+            ds.variables['x'][:] = x
+            rootgrp.close()
+
+            expected = Dataset()
+            expected['x'] = ('time', x)
+
+            # check equivalent ways to specify group
+            for group in 'foo', '/foo', 'foo/', '/foo/':
+                actual = open_dataset(tmp_file, group=group)
+                self.assertVariableEqual(actual['x'], expected['x'])
+
+            # check that missing group raises appropriate exception
+            with self.assertRaises(IOError):
+                open_dataset(tmp_file, group='bar')
+
+    def test_open_subgroup(self):
+        # Create a netCDF file with a dataset stored within a group within a group
+        with create_tmp_file() as tmp_file:
+            rootgrp = nc4.Dataset(tmp_file, 'w')
+            foogrp = rootgrp.createGroup('foo')
+            bargrp = foogrp.createGroup('bar')
+            ds = bargrp
+            ds.createDimension('time', size=10)
+            x = np.arange(10)
+            ds.createVariable('x', np.int32, dimensions=('time',))
+            ds.variables['x'][:] = x
+            rootgrp.close()
+
+            expected = Dataset()
+            expected['x'] = ('time', x)
+
+            # check equivalent ways to specify group
+            for group in 'foo/bar', '/foo/bar', 'foo/bar/', '/foo/bar/':
+                actual = open_dataset(tmp_file, group=group)
+                self.assertVariableEqual(actual['x'], expected['x'])
+
     def test_dump_and_open_encodings(self):
         # Create a netCDF file with explicit time units
         # and make sure it makes it into the encodings

diff --git a/test/test_conventions.py b/test/test_conventions.py
@@ -86,7 +86,9 @@ def test_cf_datetime(self):
             for calendar in ['standard', 'gregorian', 'proleptic_gregorian']:
                 expected = nc4.num2date(num_dates, units, calendar)
                 print(num_dates, units, calendar)
-                actual = conventions.decode_cf_datetime(num_dates, units, calendar)
+                with warnings.catch_warnings():
+                    warnings.filterwarnings('ignore', 'Unable to decode time axis')
+                    actual = conventions.decode_cf_datetime(num_dates, units, calendar)
                 if (isinstance(actual, np.ndarray)
                         and np.issubdtype(actual.dtype, np.datetime64)):
                     self.assertEqual(actual.dtype, np.dtype('M8[ns]'))
@@ -111,8 +113,6 @@ def test_cf_datetime(self):
 
     @requires_netCDF4
     def test_decoded_cf_datetime_array(self):
-        import netCDF4 as nc4
-
         actual = conventions.DecodedCFDatetimeArray(
             [0, 1, 2], 'days since 1900-01-01', 'standard')
         expected = pd.date_range('1900-01-01', periods=3).values
@@ -125,13 +125,103 @@ def test_decoded_cf_datetime_array(self):
         self.assertEqual(actual.dtype, np.dtype('datetime64[ns]'))
         self.assertArrayEqual(actual, expected)
 
-        num_dates = [722000, 720000.5]
-        units = 'days since 0001-01-01 0:0:0'
+    @requires_netCDF4
+    def test_decode_non_standard_calendar(self):
+        import netCDF4 as nc4
+
+        for calendar in ['noleap', '365_day', '360_day', 'julian', 'all_leap',
+                         '366_day']:
+            units = 'days since 0001-01-01'
+            times = pd.date_range('2001-04-01-00', end='2001-04-30-23',
+                                  freq='H')
+            noleap_time = nc4.date2num(times.to_pydatetime(), units,
+                                       calendar=calendar)
+            expected = times.values
+            with warnings.catch_warnings():
+                warnings.filterwarnings('ignore', 'Unable to decode time axis')
+                actual = conventions.decode_cf_datetime(noleap_time, units,
+                                                        calendar=calendar)
+            self.assertEqual(actual.dtype, np.dtype('M8[ns]'))
+            self.assertArrayEqual(actual, expected)
+
+    @requires_netCDF4
+    def test_decode_non_standard_calendar_single_element(self):
+        units = 'days since 0001-01-01'
+        for calendar in ['noleap', '365_day', '360_day', 'julian', 'all_leap',
+                         '366_day']:
+            for num_time in [735368, [735368], [[735368]]]:
+                with warnings.catch_warnings():
+                    warnings.filterwarnings('ignore', 'Unable to decode time axis')
+                    actual = conventions.decode_cf_datetime(num_time, units,
+                                                            calendar=calendar)
+                self.assertEqual(actual.dtype, np.dtype('M8[ns]'))
+
+    @requires_netCDF4
+    def test_decode_non_standard_calendar_single_element_fallback(self):
+        import netCDF4 as nc4
+
+        units = 'days since 0001-01-01'
+        dt = nc4.netcdftime.datetime(2001, 2, 29)
+        for calendar in ['360_day', 'all_leap', '366_day']:
+            num_time = nc4.date2num(dt, units, calendar)
+            with warnings.catch_warnings(record=True) as w:
+                warnings.simplefilter('always')
+                actual = conventions.decode_cf_datetime(num_time, units,
+                                                        calendar=calendar)
+                self.assertEqual(len(w), 1)
+                self.assertIn('Unable to decode time axis',
+                              str(w[0].message))
+            expected = np.asarray(nc4.num2date(num_time, units, calendar))
+            print(num_time, calendar, actual, expected)
+            self.assertEqual(actual.dtype, np.dtype('O'))
+            self.assertEqual(expected, actual)
+
+    @requires_netCDF4
+    def test_decode_non_standard_calendar_multidim_time(self):
+        import netCDF4 as nc4
+
         calendar = 'noleap'
-        actual = conventions.DecodedCFDatetimeArray(num_dates, units, calendar)
-        expected = nc4.num2date(num_dates, units, calendar)
-        self.assertEqual(actual.dtype, np.dtype('O'))
-        self.assertArrayEqual(actual, expected)
+        units = 'days since 0001-01-01'
+        times1 = pd.date_range('2001-04-01', end='2001-04-05', freq='D')
+        times2 = pd.date_range('2001-05-01', end='2001-05-05', freq='D')
+        noleap_time1 = nc4.date2num(times1.to_pydatetime(), units,
+                                    calendar=calendar)
+        noleap_time2 = nc4.date2num(times2.to_pydatetime(), units,
+                                    calendar=calendar)
+        mdim_time = np.empty((len(noleap_time1), 2), )
+        mdim_time[:, 0] = noleap_time1
+        mdim_time[:, 1] = noleap_time2
+
+        expected1 = times1.values
+        expected2 = times2.values
+        with warnings.catch_warnings():
+            warnings.filterwarnings('ignore', 'Unable to decode time axis')
+            actual = conventions.decode_cf_datetime(mdim_time, units,
+                                                    calendar=calendar)
+        self.assertEqual(actual.dtype, np.dtype('M8[ns]'))
+        self.assertArrayEqual(actual[:, 0], expected1)
+        self.assertArrayEqual(actual[:, 1], expected2)
+
+    @requires_netCDF4
+    def test_decode_non_standard_calendar_fallback(self):
+        import netCDF4 as nc4
+        for year in [2010, 2011, 2012, 2013, 2014]: # insure leap year doesn't matter
+            for calendar in ['360_day', '366_day', 'all_leap']:
+                calendar = '360_day'
+                units = 'days since {0}-01-01'.format(year)
+                num_times = np.arange(100)
+                expected = nc4.num2date(num_times, units, calendar)
+
+                with warnings.catch_warnings(record=True) as w:
+                    warnings.simplefilter('always')
+                    actual = conventions.decode_cf_datetime(num_times, units,
+                                                            calendar=calendar)
+                    self.assertEqual(len(w), 1)
+                    self.assertIn('Unable to decode time axis',
+                                  str(w[0].message))
+
+                self.assertEqual(actual.dtype, np.dtype('O'))
+                self.assertArrayEqual(actual, expected)
 
     @requires_netCDF4
     def test_cf_datetime_nan(self):

diff --git a/test/test_utils.py b/test/test_utils.py
@@ -1,11 +1,36 @@
 from collections import OrderedDict
+import datetime
 import numpy as np
 import pandas as pd
 
 from xray import utils
 from . import TestCase
 
 
+class TestAsSafeArray(TestCase):
+    def test_as_safe_array(self):
+        values = np.arange(5.)
+        safe_values = utils.as_safe_array(values)
+        safe_values[0] = 5.
+        self.assertEqual(values[0], safe_values[0])
+
+        dates = [datetime.datetime(2010, 1, i + 1) for i in range(5)]
+        values = np.array(dates).astype('<M8[ns]')
+        safe_values = utils.as_safe_array(values)
+        safe_values[0] = datetime.datetime(1982, 11, 20)
+        self.assertEqual(values[0], safe_values[0])
+
+    def test_as_safe_array_datetime(self):
+        dates = [datetime.datetime(2010, 1, i + 1) for i in range(5)]
+        values = np.array(dates)
+        safe_values = utils.as_safe_array(values)
+        safe_values[0] = datetime.datetime(1982, 11, 20)
+        # Note that this will fail, because as_safe_array converts
+        # datetime obecjts to datetime64 objects, which requires copying
+        #self.assertEqual(values.astype('<M8[ns]')[0], safe_values[0])
+        self.assertEqual(safe_values.dtype, '<M8[ns]')
+
+
 class TestSafeCastToIndex(TestCase):
     def test(self):
         dates = pd.date_range('2000-01-01', periods=10)

diff --git a/test/test_variable.py b/test/test_variable.py
@@ -8,7 +8,7 @@
 
 from xray import Variable, Dataset, DataArray
 from xray.variable import (Coordinate, as_variable, NumpyArrayAdapter,
-                           PandasIndexAdapter)
+                           PandasIndexAdapter, _as_compatible_data)
 
 from . import TestCase, source_ndarray
 
@@ -38,11 +38,11 @@ def test_attrs(self):
 
     def test_0d_data(self):
         d = datetime(2000, 1, 1)
-        for value, dtype in [(0, int),
-                             (np.float32(0.5), np.float32),
-                             ('foo', np.str_),
-                             (d, None),
-                             (np.datetime64(d), np.datetime64)]:
+        for value, dtype, expected in [(0, int, 0),
+                                       (np.float32(0.5), np.float32, np.float32(0.5)),
+                                       ('foo', np.str_, 'foo'),
+                                       (d, None, np.datetime64(d, 'ns')),
+                                       (np.datetime64(d), np.datetime64, np.datetime64(d, 'ns'))]:
             x = self.cls(['x'], [value])
             # check array properties
             self.assertEqual(x[0].shape, ())
@@ -52,13 +52,17 @@ def test_0d_data(self):
             self.assertTrue(x.equals(x.copy()))
             self.assertTrue(x.identical(x.copy()))
             # check value is equal for both ndarray and Variable
-            self.assertEqual(x.values[0], value)
-            self.assertEqual(x[0].values, value)
+            self.assertEqual(x.values[0], expected)
+            self.assertEqual(x[0].values, expected)
             # check type or dtype is consistent for both ndarray and Variable
             if dtype is None:
                 # check output type instead of array dtype
-                self.assertEqual(type(x.values[0]), type(value))
-                self.assertEqual(type(x[0].values), type(value))
+                self.assertEqual(type(x.values[0]), type(expected))
+                if not x.dtype.kind == 'M':
+                    # unfortunately if x contains datetime64 objects slicing
+                    # out the scalar value will actually result in another
+                    # ndarray, so we skip this test for dates.
+                    self.assertEqual(type(x[0].values), type(expected))
             else:
                 assert np.issubdtype(x.values[0].dtype, dtype), (x.values[0].dtype, dtype)
                 assert np.issubdtype(x[0].values.dtype, dtype), (x[0].values.dtype, dtype)
@@ -463,3 +467,25 @@ def test_data(self):
         self.assertIsInstance(x._data, PandasIndexAdapter)
         with self.assertRaisesRegexp(TypeError, 'cannot be modified'):
             x[:] = 0
+
+
+class TestCompatibleArray(TestCase):
+
+    def test_as_compatible_array(self):
+        d = datetime(2000, 1, 1)
+        for value, dtypes in [(0, [int]),
+                             (np.float32(0.5), [np.float32]),
+                             # String types will depend on
+                             # the version of python.
+                             ('foo', ['|S3', '<U3']),
+                             (d, ['<M8[ns]']),
+                             (np.datetime64(d), ['<M8[ns]'])]:
+            actual = _as_compatible_data(value)
+            for attr in ['dtype', 'shape', 'size', 'ndim']:
+                getattr(actual, attr)
+            self.assertIn(actual.dtype, dtypes)
+            # now do the same but as a 1-d array
+            actual = _as_compatible_data([value])
+            for attr in ['dtype', 'shape', 'size', 'ndim']:
+                getattr(actual, attr)
+            self.assertIn(actual.dtype, dtypes)