Skip to content

Commit 71f6662

Browse files
committed
Fix concatenating of Variables with dtype=datetime64
This is a more direct fix for the bug @akleeman reported pydata#125. Previously, it did not always work to modify Variable.values in-place (as done in Variable.concat), because as_array_or_item could return a copy of an ndarray instead of a view. @akleeman I didn't incorporate your tests for the
1 parent c425967 commit 71f6662

File tree

4 files changed

+127
-47
lines changed

4 files changed

+127
-47
lines changed

test/test_variable.py

Lines changed: 103 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from xray import Variable, Dataset, DataArray
1010
from xray.variable import (Coordinate, as_variable, NumpyArrayAdapter,
1111
PandasIndexAdapter)
12+
from xray.pycompat import PY3
1213

1314
from . import TestCase, source_ndarray
1415

@@ -36,32 +37,85 @@ def test_attrs(self):
3637
v.attrs['foo'] = 'baz'
3738
self.assertEqual(v.attrs['foo'], 'baz')
3839

39-
def test_0d_data(self):
40-
d = datetime(2000, 1, 1)
41-
for value, dtype in [(0, int),
42-
(np.float32(0.5), np.float32),
43-
('foo', np.str_),
44-
(d, None),
45-
(np.datetime64(d), np.datetime64)]:
40+
def assertIndexedLikeNDArray(self, variable, expected_value0,
41+
expected_dtype=None):
42+
"""Given a 1-dimensional variable, verify that the variable is indexed
43+
like a numpy.ndarray.
44+
"""
45+
self.assertEqual(variable[0].shape, ())
46+
self.assertEqual(variable[0].ndim, 0)
47+
self.assertEqual(variable[0].size, 1)
48+
# test identity
49+
self.assertTrue(variable.equals(variable.copy()))
50+
self.assertTrue(variable.identical(variable.copy()))
51+
# check value is equal for both ndarray and Variable
52+
self.assertEqual(variable.values[0], expected_value0)
53+
self.assertEqual(variable[0].values, expected_value0)
54+
# check type or dtype is consistent for both ndarray and Variable
55+
if expected_dtype is None:
56+
# check output type instead of array dtype
57+
self.assertEqual(type(variable.values[0]), type(expected_value0))
58+
self.assertEqual(type(variable[0].values), type(expected_value0))
59+
else:
60+
self.assertEqual(variable.values[0].dtype, expected_dtype)
61+
self.assertEqual(variable[0].values.dtype, expected_dtype)
62+
63+
def test_index_0d_int(self):
64+
for value, dtype in [(0, np.int_),
65+
(np.int32(0), np.int32)]:
66+
x = self.cls(['x'], [value])
67+
self.assertIndexedLikeNDArray(x, value, dtype)
68+
69+
def test_index_0d_float(self):
70+
for value, dtype in [(0.5, np.float_),
71+
(np.float32(0.5), np.float32)]:
72+
x = self.cls(['x'], [value])
73+
self.assertIndexedLikeNDArray(x, value, dtype)
74+
75+
def test_index_0d_string(self):
76+
for value, dtype in [('foo', np.dtype('U3' if PY3 else 'S3')),
77+
(u'foo', np.dtype('U3'))]:
4678
x = self.cls(['x'], [value])
47-
# check array properties
48-
self.assertEqual(x[0].shape, ())
49-
self.assertEqual(x[0].ndim, 0)
50-
self.assertEqual(x[0].size, 1)
51-
# test identity
52-
self.assertTrue(x.equals(x.copy()))
53-
self.assertTrue(x.identical(x.copy()))
54-
# check value is equal for both ndarray and Variable
55-
self.assertEqual(x.values[0], value)
56-
self.assertEqual(x[0].values, value)
57-
# check type or dtype is consistent for both ndarray and Variable
58-
if dtype is None:
59-
# check output type instead of array dtype
60-
self.assertEqual(type(x.values[0]), type(value))
61-
self.assertEqual(type(x[0].values), type(value))
62-
else:
63-
assert np.issubdtype(x.values[0].dtype, dtype), (x.values[0].dtype, dtype)
64-
assert np.issubdtype(x[0].values.dtype, dtype), (x[0].values.dtype, dtype)
79+
self.assertIndexedLikeNDArray(x, value, dtype)
80+
81+
def test_index_0d_datetime(self):
82+
d = datetime(2000, 1, 1)
83+
x = self.cls(['x'], [d])
84+
self.assertIndexedLikeNDArray(x, d)
85+
86+
x = self.cls(['x'], [np.datetime64(d)])
87+
self.assertIndexedLikeNDArray(x, np.datetime64(d), 'datetime64[ns]')
88+
89+
x = self.cls(['x'], pd.DatetimeIndex([d]))
90+
self.assertIndexedLikeNDArray(x, np.datetime64(d), 'datetime64[ns]')
91+
92+
def test_index_0d_object(self):
93+
94+
class HashableItemWrapper(object):
95+
def __init__(self, item):
96+
self.item = item
97+
98+
def __eq__(self, other):
99+
return self.item == other.item
100+
101+
def __hash__(self):
102+
return hash(self.item)
103+
104+
def __repr__(self):
105+
return '%s(item=%r)' % (type(self).__name__, self.item)
106+
107+
item = HashableItemWrapper((1, 2, 3))
108+
x = self.cls('x', [item])
109+
self.assertIndexedLikeNDArray(x, item)
110+
111+
def test_index_and_concat_datetime64(self):
112+
# regression test for #125
113+
expected = self.cls('t', pd.date_range('2011-09-01', periods=10))
114+
for times in [[expected[i] for i in range(10)],
115+
[expected[[i]] for i in range(10)]]:
116+
actual = Variable.concat(times, 't')
117+
self.assertArrayEqual(expected, actual)
118+
self.assertEqual(expected.dtype, actual.dtype)
65119

66120
def test_0d_time_data(self):
67121
# regression test for #105
@@ -229,6 +283,30 @@ def test_item(self):
229283
self.assertEqual(v.item(), 0)
230284
self.assertIs(type(v.item()), float)
231285

286+
def test_datetime64_precision(self):
287+
# verify that datetime64 is always converted to ns precision
288+
values = np.datetime64('2000-01-01T00')
289+
v = Variable([], values)
290+
self.assertEqual(v.dtype, np.dtype('datetime64[ns]'))
291+
self.assertEqual(v.values, values)
292+
self.assertEqual(v.values.dtype, np.dtype('datetime64[ns]'))
293+
294+
values = pd.date_range('2000-01-01', periods=3).values.astype(
295+
'datetime64[s]')
296+
v = Variable(['t'], values)
297+
self.assertEqual(v.dtype, np.dtype('datetime64[ns]'))
298+
self.assertArrayEqual(v.values, values)
299+
self.assertEqual(v.values.dtype, np.dtype('datetime64[ns]'))
300+
301+
def test_0d_str(self):
302+
v = Variable([], u'foo')
303+
self.assertEqual(v.dtype, np.dtype('U3'))
304+
self.assertEqual(v.values, 'foo')
305+
306+
v = Variable([], np.string_('foo'))
307+
self.assertEqual(v.dtype, np.dtype('S3'))
308+
self.assertEqual(v.values, bytes('foo', 'ascii') if PY3 else 'foo')
309+
232310
def test_equals_and_identical(self):
233311
d = np.random.rand(10, 3)
234312
d[0, 0] = np.nan

xray/backends/netCDF4_.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from .netcdf3 import encode_nc3_variable
88
import xray
99
from xray.conventions import encode_cf_variable
10-
from xray.utils import FrozenOrderedDict, NDArrayMixin, as_array_or_item
10+
from xray.utils import FrozenOrderedDict, NDArrayMixin
1111
from xray import indexing
1212
from xray.pycompat import iteritems, basestring
1313

@@ -31,7 +31,7 @@ def __getitem__(self, key):
3131
# work around for netCDF4-python's broken handling of 0-d
3232
# arrays (slicing them always returns a 1-dimensional array):
3333
# https://github.com/Unidata/netcdf4-python/pull/220
34-
data = as_array_or_item(np.asscalar(self.array[key]))
34+
data = np.asscalar(self.array[key])
3535
else:
3636
data = self.array[key]
3737
return data

xray/utils.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import operator
55
import warnings
66
from collections import OrderedDict, Mapping, MutableMapping
7-
from datetime import datetime
87

98
import numpy as np
109
import pandas as pd
@@ -46,23 +45,23 @@ def as_safe_array(values, dtype=None):
4645
return values
4746

4847

49-
def as_array_or_item(values, dtype=None):
50-
"""Return the given values as a numpy array of the indicated dtype, or as
51-
an individual value if it's a 0-dimensional object array or datetime.
48+
def as_array_or_item(data):
49+
"""Return the given values as a numpy array, or as an individual item if
50+
it's a 0-dimensional object array or datetime64.
51+
52+
Importantly, this function does not copy data if it is already an ndarray -
53+
otherwise, it will not be possible to update Variable values in place.
5254
"""
53-
if isinstance(values, datetime):
54-
# shortcut because if you try to make a datetime or Timestamp object
55-
# into an array with the proper dtype, it is liable to be silently
56-
# converted into an integer instead :(
57-
return values
58-
values = as_safe_array(values, dtype=dtype)
59-
if values.ndim == 0 and values.dtype.kind == 'O':
60-
# unpack 0d object arrays to be consistent with numpy
61-
values = values.item()
62-
if isinstance(values, pd.Timestamp):
63-
# turn Timestamps back into datetime64 objects
64-
values = np.datetime64(values, 'ns')
65-
return values
55+
data = np.asarray(data)
56+
if data.ndim == 0:
57+
if data.dtype.kind == 'O':
58+
# unpack 0d object arrays to be consistent with numpy
59+
data = data.item()
60+
elif data.dtype.kind == 'M':
61+
# convert to a np.datetime64 object, because 0-dimensional ndarrays
62+
# with dtype=datetime64 are broken :(
63+
data = np.datetime64(data, 'ns')
64+
return data
6665

6766

6867
def squeeze(xray_obj, dimensions, dimension=None):

xray/variable.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def _as_compatible_data(data):
5959
# numeric type like np.float32
6060
required = ['dtype', 'shape', 'size', 'ndim']
6161
if (any(not hasattr(data, attr) for attr in required)
62-
or isinstance(data, np.string_)):
62+
or isinstance(data, np.datetime64)):
6363
data = utils.as_safe_array(data)
6464
elif not isinstance(data, (pd.Index, indexing.LazilyIndexedArray)):
6565
try:
@@ -130,8 +130,9 @@ def __getitem__(self, key):
130130
# unpack key so it can index a pandas.Index object (pandas.Index
131131
# objects don't like tuples)
132132
key, = key
133+
133134
if isinstance(key, (int, np.integer)):
134-
return utils.as_array_or_item(self.array[key], dtype=self.dtype)
135+
value = np.asarray(self.array[key], dtype=self.dtype)
135136
else:
136137
if isinstance(key, slice) and key == slice(None):
137138
# pandas<0.14 does dtype inference when slicing; we would like
@@ -140,7 +141,9 @@ def __getitem__(self, key):
140141
arr = self.array
141142
else:
142143
arr = self.array[key]
143-
return PandasIndexAdapter(arr, dtype=self.dtype)
144+
value = PandasIndexAdapter(arr, dtype=self.dtype)
145+
146+
return value
144147

145148
def __repr__(self):
146149
return ('%s(array=%r, dtype=%r)'

0 commit comments

Comments
 (0)