Skip to content

Only copy datetime64 data if it is using non-nanosecond precision. #125

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ nosetests.xml
.project
.pydevproject

# PyCharm
.idea

# xray specific
doc/_build
doc/generated
Expand Down
7 changes: 6 additions & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@
print "numpy: %s, %s" % (numpy.__version__, numpy.__file__)
except ImportError:
print "no numpy"
try:
import scipy
print "scipy: %s, %s" % (scipy.__version__, scipy.__file__)
except ImportError:
print "no scipy"
try:
import pandas
print "pandas: %s, %s" % (pandas.__version__, pandas.__file__)
Expand Down Expand Up @@ -68,7 +73,7 @@ def __getattr__(cls, name):
else:
return Mock()

MOCK_MODULES = ['netCDF4', 'scipy', 'scipy.io']
MOCK_MODULES = ['netCDF4']

for mod_name in MOCK_MODULES:
sys.modules[mod_name] = Mock()
Expand Down
5 changes: 3 additions & 2 deletions doc/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# only the dependencies required to build xray's docs
# all others (netCDF4, scipy) are mocked out in conf.py
numpy==1.8.1
numpy>=1.7
ipython==2.0.0
pandas==0.13.1
six
python-dateutil
scipy
six
matplotlib
17 changes: 17 additions & 0 deletions doc/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,17 @@ skip missing values, but we expect to switch to NA skipping versions (like
pandas) in the future. For now, you can do NA skipping aggregate by passing
NA aware numpy functions to the :py:attr:`~xray.DataArray.reduce` method:

.. ipython:: python
:suppress:

# monkey patch numpy with nanmean from scipy.stats so the docs can build
# even with numpy 1.7 (np.nanmean first appears in numpy 1.8).
# this is to work around an unfortunate limitation of readthedocs/pip which
# stops us from upgrading both numpy and pandas.

from scipy import stats
np.nanmean = stats.nanmean

.. ipython:: python

foo.reduce(np.nanmean, 'time')
Expand Down Expand Up @@ -807,6 +818,12 @@ We can load NetCDF files to create a new Dataset using the
Attributes:
title: example attribute

A dataset can also be loaded from a specific group within a NetCDF
file. To load from a group, pass a ``group`` keyword argument to the
``open_dataset`` function. The group can be specified as a path-like
string, e.g., to access subgroup 'bar' within group 'foo' pass
'/foo/bar' as the ``group`` argument.

Data is loaded lazily from NetCDF files. You can manipulate, slice and subset
Dataset and DataArray objects, and no array values are loaded into memory until
necessary. For an example of how these lazy arrays work, since the OpenDAP
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ def write_version_py(filename=None):
classifiers=CLASSIFIERS,
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
install_requires=['numpy >= 1.8', 'pandas >= 0.13.1'],
tests_require=['mock >= 1.0.1', 'nose >= 1.0'],
install_requires=['numpy >= 1.7', 'pandas >= 0.13.1'],
tests_require=['nose >= 1.0'],
url=URL,
test_suite='nose.collector',
packages=['xray', 'xray.backends'])
45 changes: 45 additions & 0 deletions test/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,51 @@ def test_open_encodings(self):
if k in expected['time'].encoding}
self.assertDictEqual(actual_encoding, expected['time'].encoding)

def test_open_group(self):
# Create a netCDF file with a dataset stored within a group
with create_tmp_file() as tmp_file:
rootgrp = nc4.Dataset(tmp_file, 'w')
foogrp = rootgrp.createGroup('foo')
ds = foogrp
ds.createDimension('time', size=10)
x = np.arange(10)
ds.createVariable('x', np.int32, dimensions=('time',))
ds.variables['x'][:] = x
rootgrp.close()

expected = Dataset()
expected['x'] = ('time', x)

# check equivalent ways to specify group
for group in 'foo', '/foo', 'foo/', '/foo/':
actual = open_dataset(tmp_file, group=group)
self.assertVariableEqual(actual['x'], expected['x'])

# check that missing group raises appropriate exception
with self.assertRaises(IOError):
open_dataset(tmp_file, group='bar')

def test_open_subgroup(self):
# Create a netCDF file with a dataset stored within a group within a group
with create_tmp_file() as tmp_file:
rootgrp = nc4.Dataset(tmp_file, 'w')
foogrp = rootgrp.createGroup('foo')
bargrp = foogrp.createGroup('bar')
ds = bargrp
ds.createDimension('time', size=10)
x = np.arange(10)
ds.createVariable('x', np.int32, dimensions=('time',))
ds.variables['x'][:] = x
rootgrp.close()

expected = Dataset()
expected['x'] = ('time', x)

# check equivalent ways to specify group
for group in 'foo/bar', '/foo/bar', 'foo/bar/', '/foo/bar/':
actual = open_dataset(tmp_file, group=group)
self.assertVariableEqual(actual['x'], expected['x'])

def test_dump_and_open_encodings(self):
# Create a netCDF file with explicit time units
# and make sure it makes it into the encodings
Expand Down
108 changes: 99 additions & 9 deletions test/test_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ def test_cf_datetime(self):
for calendar in ['standard', 'gregorian', 'proleptic_gregorian']:
expected = nc4.num2date(num_dates, units, calendar)
print(num_dates, units, calendar)
actual = conventions.decode_cf_datetime(num_dates, units, calendar)
with warnings.catch_warnings():
warnings.filterwarnings('ignore', 'Unable to decode time axis')
actual = conventions.decode_cf_datetime(num_dates, units, calendar)
if (isinstance(actual, np.ndarray)
and np.issubdtype(actual.dtype, np.datetime64)):
self.assertEqual(actual.dtype, np.dtype('M8[ns]'))
Expand All @@ -111,8 +113,6 @@ def test_cf_datetime(self):

@requires_netCDF4
def test_decoded_cf_datetime_array(self):
import netCDF4 as nc4

actual = conventions.DecodedCFDatetimeArray(
[0, 1, 2], 'days since 1900-01-01', 'standard')
expected = pd.date_range('1900-01-01', periods=3).values
Expand All @@ -125,13 +125,103 @@ def test_decoded_cf_datetime_array(self):
self.assertEqual(actual.dtype, np.dtype('datetime64[ns]'))
self.assertArrayEqual(actual, expected)

num_dates = [722000, 720000.5]
units = 'days since 0001-01-01 0:0:0'
@requires_netCDF4
def test_decode_non_standard_calendar(self):
import netCDF4 as nc4

for calendar in ['noleap', '365_day', '360_day', 'julian', 'all_leap',
'366_day']:
units = 'days since 0001-01-01'
times = pd.date_range('2001-04-01-00', end='2001-04-30-23',
freq='H')
noleap_time = nc4.date2num(times.to_pydatetime(), units,
calendar=calendar)
expected = times.values
with warnings.catch_warnings():
warnings.filterwarnings('ignore', 'Unable to decode time axis')
actual = conventions.decode_cf_datetime(noleap_time, units,
calendar=calendar)
self.assertEqual(actual.dtype, np.dtype('M8[ns]'))
self.assertArrayEqual(actual, expected)

@requires_netCDF4
def test_decode_non_standard_calendar_single_element(self):
units = 'days since 0001-01-01'
for calendar in ['noleap', '365_day', '360_day', 'julian', 'all_leap',
'366_day']:
for num_time in [735368, [735368], [[735368]]]:
with warnings.catch_warnings():
warnings.filterwarnings('ignore', 'Unable to decode time axis')
actual = conventions.decode_cf_datetime(num_time, units,
calendar=calendar)
self.assertEqual(actual.dtype, np.dtype('M8[ns]'))

@requires_netCDF4
def test_decode_non_standard_calendar_single_element_fallback(self):
import netCDF4 as nc4

units = 'days since 0001-01-01'
dt = nc4.netcdftime.datetime(2001, 2, 29)
for calendar in ['360_day', 'all_leap', '366_day']:
num_time = nc4.date2num(dt, units, calendar)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
actual = conventions.decode_cf_datetime(num_time, units,
calendar=calendar)
self.assertEqual(len(w), 1)
self.assertIn('Unable to decode time axis',
str(w[0].message))
expected = np.asarray(nc4.num2date(num_time, units, calendar))
print(num_time, calendar, actual, expected)
self.assertEqual(actual.dtype, np.dtype('O'))
self.assertEqual(expected, actual)

@requires_netCDF4
def test_decode_non_standard_calendar_multidim_time(self):
import netCDF4 as nc4

calendar = 'noleap'
actual = conventions.DecodedCFDatetimeArray(num_dates, units, calendar)
expected = nc4.num2date(num_dates, units, calendar)
self.assertEqual(actual.dtype, np.dtype('O'))
self.assertArrayEqual(actual, expected)
units = 'days since 0001-01-01'
times1 = pd.date_range('2001-04-01', end='2001-04-05', freq='D')
times2 = pd.date_range('2001-05-01', end='2001-05-05', freq='D')
noleap_time1 = nc4.date2num(times1.to_pydatetime(), units,
calendar=calendar)
noleap_time2 = nc4.date2num(times2.to_pydatetime(), units,
calendar=calendar)
mdim_time = np.empty((len(noleap_time1), 2), )
mdim_time[:, 0] = noleap_time1
mdim_time[:, 1] = noleap_time2

expected1 = times1.values
expected2 = times2.values
with warnings.catch_warnings():
warnings.filterwarnings('ignore', 'Unable to decode time axis')
actual = conventions.decode_cf_datetime(mdim_time, units,
calendar=calendar)
self.assertEqual(actual.dtype, np.dtype('M8[ns]'))
self.assertArrayEqual(actual[:, 0], expected1)
self.assertArrayEqual(actual[:, 1], expected2)

@requires_netCDF4
def test_decode_non_standard_calendar_fallback(self):
import netCDF4 as nc4
for year in [2010, 2011, 2012, 2013, 2014]: # insure leap year doesn't matter
for calendar in ['360_day', '366_day', 'all_leap']:
calendar = '360_day'
units = 'days since {0}-01-01'.format(year)
num_times = np.arange(100)
expected = nc4.num2date(num_times, units, calendar)

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
actual = conventions.decode_cf_datetime(num_times, units,
calendar=calendar)
self.assertEqual(len(w), 1)
self.assertIn('Unable to decode time axis',
str(w[0].message))

self.assertEqual(actual.dtype, np.dtype('O'))
self.assertArrayEqual(actual, expected)

@requires_netCDF4
def test_cf_datetime_nan(self):
Expand Down
25 changes: 25 additions & 0 deletions test/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,36 @@
from collections import OrderedDict
import datetime
import numpy as np
import pandas as pd

from xray import utils
from . import TestCase


class TestAsSafeArray(TestCase):
def test_as_safe_array(self):
values = np.arange(5.)
safe_values = utils.as_safe_array(values)
safe_values[0] = 5.
self.assertEqual(values[0], safe_values[0])

dates = [datetime.datetime(2010, 1, i + 1) for i in range(5)]
values = np.array(dates).astype('<M8[ns]')
safe_values = utils.as_safe_array(values)
safe_values[0] = datetime.datetime(1982, 11, 20)
self.assertEqual(values[0], safe_values[0])

def test_as_safe_array_datetime(self):
dates = [datetime.datetime(2010, 1, i + 1) for i in range(5)]
values = np.array(dates)
safe_values = utils.as_safe_array(values)
safe_values[0] = datetime.datetime(1982, 11, 20)
# Note that this will fail, because as_safe_array converts
# datetime obecjts to datetime64 objects, which requires copying
#self.assertEqual(values.astype('<M8[ns]')[0], safe_values[0])
self.assertEqual(safe_values.dtype, '<M8[ns]')


class TestSafeCastToIndex(TestCase):
def test(self):
dates = pd.date_range('2000-01-01', periods=10)
Expand Down
46 changes: 36 additions & 10 deletions test/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from xray import Variable, Dataset, DataArray
from xray.variable import (Coordinate, as_variable, NumpyArrayAdapter,
PandasIndexAdapter)
PandasIndexAdapter, _as_compatible_data)

from . import TestCase, source_ndarray

Expand Down Expand Up @@ -38,11 +38,11 @@ def test_attrs(self):

def test_0d_data(self):
d = datetime(2000, 1, 1)
for value, dtype in [(0, int),
(np.float32(0.5), np.float32),
('foo', np.str_),
(d, None),
(np.datetime64(d), np.datetime64)]:
for value, dtype, expected in [(0, int, 0),
(np.float32(0.5), np.float32, np.float32(0.5)),
('foo', np.str_, 'foo'),
(d, None, np.datetime64(d, 'ns')),
(np.datetime64(d), np.datetime64, np.datetime64(d, 'ns'))]:
x = self.cls(['x'], [value])
# check array properties
self.assertEqual(x[0].shape, ())
Expand All @@ -52,13 +52,17 @@ def test_0d_data(self):
self.assertTrue(x.equals(x.copy()))
self.assertTrue(x.identical(x.copy()))
# check value is equal for both ndarray and Variable
self.assertEqual(x.values[0], value)
self.assertEqual(x[0].values, value)
self.assertEqual(x.values[0], expected)
self.assertEqual(x[0].values, expected)
# check type or dtype is consistent for both ndarray and Variable
if dtype is None:
# check output type instead of array dtype
self.assertEqual(type(x.values[0]), type(value))
self.assertEqual(type(x[0].values), type(value))
self.assertEqual(type(x.values[0]), type(expected))
if not x.dtype.kind == 'M':
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not so sure about making this exception... it leaves us returning non-datetime-datetimes like:

(Pdb) x[0].values
array(946684800000000000, dtype='datetime64[ns]')

# unfortunately if x contains datetime64 objects slicing
# out the scalar value will actually result in another
# ndarray, so we skip this test for dates.
self.assertEqual(type(x[0].values), type(expected))
else:
assert np.issubdtype(x.values[0].dtype, dtype), (x.values[0].dtype, dtype)
assert np.issubdtype(x[0].values.dtype, dtype), (x[0].values.dtype, dtype)
Expand Down Expand Up @@ -463,3 +467,25 @@ def test_data(self):
self.assertIsInstance(x._data, PandasIndexAdapter)
with self.assertRaisesRegexp(TypeError, 'cannot be modified'):
x[:] = 0


class TestCompatibleArray(TestCase):

def test_as_compatible_array(self):
d = datetime(2000, 1, 1)
for value, dtypes in [(0, [int]),
(np.float32(0.5), [np.float32]),
# String types will depend on
# the version of python.
('foo', ['|S3', '<U3']),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another way to do this would be to check something like issubclass(actual.type, str) instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you prefer that you have my permission to change it.

(d, ['<M8[ns]']),
(np.datetime64(d), ['<M8[ns]'])]:
actual = _as_compatible_data(value)
for attr in ['dtype', 'shape', 'size', 'ndim']:
getattr(actual, attr)
self.assertIn(actual.dtype, dtypes)
# now do the same but as a 1-d array
actual = _as_compatible_data([value])
for attr in ['dtype', 'shape', 'size', 'ndim']:
getattr(actual, attr)
self.assertIn(actual.dtype, dtypes)
Loading