From 596d800103b2286fa6bc4bb3b0a5fa54029cec50 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 6 Dec 2018 12:40:42 -0500 Subject: [PATCH 1/7] Cast datetime and timedelta to signed 64-bit int The `NaT` type is represented as `-0`. As a result, casting to an unsigned integral fails and throws an error. However casting to a signed integral type does not have this problem and proceeds without issues. --- zarr/meta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/meta.py b/zarr/meta.py index 7984efb701..c90c12ff38 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -209,6 +209,6 @@ def encode_fill_value(v, dtype): elif dtype.kind == 'U': return v elif dtype.kind in 'mM': - return int(v.view('u8')) + return int(v.view('i8')) else: return v From 175cee64ed25b39e0b27b79c72ff07fc38aa96d7 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 6 Dec 2018 12:40:42 -0500 Subject: [PATCH 2/7] Update datetime/timedelta test to use signed ints --- zarr/tests/test_core.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 544ec95c41..cbad222edb 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -955,8 +955,9 @@ def test_dtypes(self): dtype = '{}8[{}]'.format(base_type, resolution) z = self.create_array(shape=100, dtype=dtype, fill_value=0) assert z.dtype == np.dtype(dtype) - a = np.random.randint(0, np.iinfo('u8').max, size=z.shape[0], - dtype='u8').view(dtype) + a = np.random.randint(np.iinfo('i8').min, np.iinfo('i8').max, + size=z.shape[0], + dtype='i8').view(dtype) z[:] = a assert_array_equal(a, z[:]) From d508289401c5ff52e61221b1065b3e2d82e7aa73 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 6 Dec 2018 12:40:43 -0500 Subject: [PATCH 3/7] Test encode/decode of a datetime/timedelta array Use a structured array with datetime and timedelta values and a fill value of NaT to test a bunch of different workarounds for encoding and decoding datetime and timedelta values and array. --- zarr/tests/test_meta.py | 54 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/zarr/tests/test_meta.py b/zarr/tests/test_meta.py index 904c2146a7..aa1fa78bb7 100644 --- a/zarr/tests/test_meta.py +++ b/zarr/tests/test_meta.py @@ -116,6 +116,60 @@ def test_encode_decode_array_2(): assert [df.get_config()] == meta_dec['filters'] +def test_encode_decode_array_datetime_timedelta(): + + # some variations + for k in ['m8[s]', 'M8[s]']: + compressor = Blosc(cname='lz4', clevel=3, shuffle=2) + dtype = np.dtype(k) + fill_value = np.full((), np.nan, dtype=dtype)[()] + meta = dict( + shape=(100, 100), + chunks=(10, 10), + dtype=dtype, + compressor=compressor.get_config(), + fill_value=fill_value, + order=dtype.char, + filters=[] + ) + + meta_json = '''{ + "chunks": [10, 10], + "compressor": { + "id": "blosc", + "clevel": 3, + "cname": "lz4", + "shuffle": 2, + "blocksize": 0 + }, + "dtype": "%s", + "fill_value": -9223372036854775808, + "filters": [], + "order": "%s", + "shape": [100, 100], + "zarr_format": %s + }''' % (dtype.str, dtype.char, ZARR_FORMAT) + + # test encoding + meta_enc = encode_array_metadata(meta) + assert_json_equal(meta_json, meta_enc) + + # test decoding + meta_dec = decode_array_metadata(meta_enc) + assert ZARR_FORMAT == meta_dec['zarr_format'] + assert meta['shape'] == meta_dec['shape'] + assert meta['chunks'] == meta_dec['chunks'] + assert meta['dtype'] == meta_dec['dtype'] + assert meta['compressor'] == meta_dec['compressor'] + assert meta['order'] == meta_dec['order'] + # Based off of this SO answer: https://stackoverflow.com/a/49972198 + assert np.all( + fill_value.view((np.uint8, fill_value.itemsize)) == + meta_dec['fill_value'].view((np.uint8, meta_dec['fill_value'].itemsize)) + ) + assert [] == meta_dec['filters'] + + def test_encode_decode_array_dtype_shape(): meta = dict( From aed79bc9ecea31735b026970e533ebf13958b0ad Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 6 Dec 2018 12:40:44 -0500 Subject: [PATCH 4/7] Note improved `NaT` handling in the changelog --- docs/release.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index 45eb9c8a49..c87ebff589 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -41,6 +41,11 @@ Maintenance * CI and test environments have been upgraded to include Python 3.7, drop Python 3.4, and upgrade all pinned package requirements. :issue:`308`. +* Corrects handling of ``NaT`` in ``datetime64`` and ``timedelta64`` in various + compressors (by :user:`John Kirkham `; :issue:`344`). + +Acknowledgments +~~~~~~~~~~~~~~~ .. _release_2.2.0: From 8616880b1a72c1337dbb227dedc5e047ccb18003 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 7 Dec 2018 09:44:35 -0500 Subject: [PATCH 5/7] Use `dtype` to cast `NaT` explicitly --- zarr/tests/test_meta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_meta.py b/zarr/tests/test_meta.py index aa1fa78bb7..12dda299c8 100644 --- a/zarr/tests/test_meta.py +++ b/zarr/tests/test_meta.py @@ -122,7 +122,7 @@ def test_encode_decode_array_datetime_timedelta(): for k in ['m8[s]', 'M8[s]']: compressor = Blosc(cname='lz4', clevel=3, shuffle=2) dtype = np.dtype(k) - fill_value = np.full((), np.nan, dtype=dtype)[()] + fill_value = dtype.type("NaT") meta = dict( shape=(100, 100), chunks=(10, 10), From 09c046f330abe5721e5582e9e4a64afccd335b93 Mon Sep 17 00:00:00 2001 From: Alistair Miles Date: Fri, 7 Dec 2018 18:26:26 +0000 Subject: [PATCH 6/7] test NaT as fill_value for zarr.full() --- zarr/tests/test_creation.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index ef2232c234..57ebc44980 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -134,6 +134,12 @@ def test_full(): # nan z = full(100, chunks=10, fill_value=np.nan, dtype='f8') assert np.all(np.isnan(z[:])) + + # NaT + z = full(100, chunks=10, fill_value='NaT', dtype='M8[s]') + assert np.all(np.isnat(z[:])) + z = full(100, chunks=10, fill_value='NaT', dtype='m8[s]') + assert np.all(np.isnat(z[:])) # byte string dtype v = b'xxx' From d3aaf4e95628a9e34ac8403a2149b4b7e475eb08 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 7 Dec 2018 14:09:56 -0500 Subject: [PATCH 7/7] Drop extra whitespace Should fix the flake8 error seen on CI. ref: https://travis-ci.org/zarr-developers/zarr/jobs/465075634 --- zarr/tests/test_creation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index 57ebc44980..4c2af854fb 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -134,7 +134,7 @@ def test_full(): # nan z = full(100, chunks=10, fill_value=np.nan, dtype='f8') assert np.all(np.isnan(z[:])) - + # NaT z = full(100, chunks=10, fill_value='NaT', dtype='M8[s]') assert np.all(np.isnat(z[:]))