From 7eed366397a4c8bf98f8bb4adac34fa022ac4532 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Fri, 30 Nov 2018 12:18:30 -0500
Subject: [PATCH 01/21] Bump Numcodecs requirement to 0.6.1

---
 requirements_dev.txt | 2 +-
 setup.py             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements_dev.txt b/requirements_dev.txt
index 2ad18f372c..d39ba9e9b8 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -1,3 +1,3 @@
 asciitree==0.3.3
 fasteners==0.14.1
-numcodecs==0.5.5
+numcodecs==0.6.1
diff --git a/setup.py b/setup.py
index a5e8334e43..903af3bc04 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@
         'asciitree',
         'numpy>=1.7',
         'fasteners',
-        'numcodecs>=0.5.3',
+        'numcodecs>=0.6.1',
     ],
     package_dir={'': '.'},
     packages=['zarr', 'zarr.tests'],

From 2552f620191cafa72429566f4a8ce4f49b4db4d3 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Fri, 30 Nov 2018 13:06:29 -0500
Subject: [PATCH 02/21] Assert MsgPack round-trips bytes objects correctly

Previously MsgPack was turning bytes objects to unicode objects when
round-tripping them. However this has been fixed in the latest version
of Numcodecs. So correct this test now that MsgPack is working
correctly.
---
 zarr/tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py
index 11891f8fe9..544ec95c41 100644
--- a/zarr/tests/test_core.py
+++ b/zarr/tests/test_core.py
@@ -982,7 +982,7 @@ def test_object_arrays(self):
         z[0] = 'foo'
         assert z[0] == 'foo'
         z[1] = b'bar'
-        assert z[1] == 'bar'  # msgpack gets this wrong
+        assert z[1] == b'bar'
         z[2] = 1
         assert z[2] == 1
         z[3] = [2, 4, 6, 'baz']

From aee5aceced5e5a3f2698f2363540f064c200f4a9 Mon Sep 17 00:00:00 2001
From: Alistair Miles <alimanfoo@googlemail.com>
Date: Sat, 1 Dec 2018 14:09:40 +0000
Subject: [PATCH 03/21] properly guard against removal of object codec

---
 zarr/core.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/zarr/core.py b/zarr/core.py
index b4da45cd99..bcae03cb9f 100644
--- a/zarr/core.py
+++ b/zarr/core.py
@@ -8,6 +8,7 @@
 
 
 import numpy as np
+from numcodecs.compat import ensure_contiguous_ndarray
 
 
 from zarr.util import (is_total_slice, human_readable_size, normalize_resize_args,
@@ -1743,18 +1744,25 @@ def _decode_chunk(self, cdata):
             for f in self._filters[::-1]:
                 chunk = f.decode(chunk)
 
-        # view as correct dtype
+        # view as numpy array with correct dtype
         if self._dtype == object:
-            if isinstance(chunk, np.ndarray):
-                chunk = chunk.astype(self._dtype)
+            # special case object dtype, because incorrect handling can lead to
+            # segfaults and other bad things happening
+            if isinstance(chunk, np.ndarray) and chunk.dtype == object:
+                # chunk is already of correct dtype, good to carry on
+                # flatten just to be sure we can reshape later
+                chunk = chunk.reshape(-1, order='A')
             else:
+                # If we end up here, someone must have hacked around with the filters.
+                # We cannot deal with object arrays unless there is an object
+                # codec in the filter chain, i.e., a filter that converts from object
+                # array to something else during encoding, and converts back to object
+                # array during decoding.
                 raise RuntimeError('cannot read object array without object codec')
-        elif isinstance(chunk, np.ndarray):
-            chunk = chunk.view(self._dtype)
         else:
-            chunk = np.frombuffer(chunk, dtype=self._dtype)
+            chunk = ensure_contiguous_ndarray(chunk).view(self._dtype)
 
-        # reshape
+        # ensure correct chunk shape
         chunk = chunk.reshape(self._chunks, order=self._order)
 
         return chunk

From bf4eee8cc763b1917e299fcfde04a5e5d9a0938b Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 13:21:24 -0500
Subject: [PATCH 04/21] Ensure `chunk` in `_decode_chunk` is an `ndarray`

---
 zarr/core.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/zarr/core.py b/zarr/core.py
index bcae03cb9f..94bd94edde 100644
--- a/zarr/core.py
+++ b/zarr/core.py
@@ -8,7 +8,7 @@
 
 
 import numpy as np
-from numcodecs.compat import ensure_contiguous_ndarray
+from numcodecs.compat import ensure_ndarray, ensure_contiguous_ndarray
 
 
 from zarr.util import (is_total_slice, human_readable_size, normalize_resize_args,
@@ -1745,10 +1745,11 @@ def _decode_chunk(self, cdata):
                 chunk = f.decode(chunk)
 
         # view as numpy array with correct dtype
+        chunk = ensure_ndarray(chunk)
         if self._dtype == object:
             # special case object dtype, because incorrect handling can lead to
             # segfaults and other bad things happening
-            if isinstance(chunk, np.ndarray) and chunk.dtype == object:
+            if chunk.dtype == object:
                 # chunk is already of correct dtype, good to carry on
                 # flatten just to be sure we can reshape later
                 chunk = chunk.reshape(-1, order='A')

From b741fe12a0099cdcc0697a80b3ace31c82738cce Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 15:47:44 -0500
Subject: [PATCH 05/21] Reshape `chunk` ourselves since it is an `ndarray`

As we already ensured the `chunk` is an `ndarray` viewing the original
data, there is no need for us to do that here as well. Plus the checks
performed by `ensure_contiguous_ndarray` are not needed for our use case
here. Particularly as we have already handled the unusual type cases
above. We also don't need to constrain the buffer size. As such the only
thing we really need is to flatten the array and make it contiguous,
which is what we handle here directly.
---
 zarr/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/zarr/core.py b/zarr/core.py
index 94bd94edde..b5d0185faf 100644
--- a/zarr/core.py
+++ b/zarr/core.py
@@ -8,7 +8,7 @@
 
 
 import numpy as np
-from numcodecs.compat import ensure_ndarray, ensure_contiguous_ndarray
+from numcodecs.compat import ensure_ndarray
 
 
 from zarr.util import (is_total_slice, human_readable_size, normalize_resize_args,
@@ -1761,7 +1761,7 @@ def _decode_chunk(self, cdata):
                 # array during decoding.
                 raise RuntimeError('cannot read object array without object codec')
         else:
-            chunk = ensure_contiguous_ndarray(chunk).view(self._dtype)
+            chunk = chunk.reshape(-1, order='A').view(self._dtype)
 
         # ensure correct chunk shape
         chunk = chunk.reshape(self._chunks, order=self._order)

From f3144ae6b4fdc929eb1390b1ed87ee5a35e6862f Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 15:47:50 -0500
Subject: [PATCH 06/21] Refactor `reshape` from `_decode_chunk`

As both the expected `object` case and the non-`object` case perform a
`reshape` to flatten the data, go ahead and refactor that out of both
cases and handle it generally. Simplifies the code a bit.
---
 zarr/core.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/zarr/core.py b/zarr/core.py
index b5d0185faf..ab5de14512 100644
--- a/zarr/core.py
+++ b/zarr/core.py
@@ -1749,11 +1749,7 @@ def _decode_chunk(self, cdata):
         if self._dtype == object:
             # special case object dtype, because incorrect handling can lead to
             # segfaults and other bad things happening
-            if chunk.dtype == object:
-                # chunk is already of correct dtype, good to carry on
-                # flatten just to be sure we can reshape later
-                chunk = chunk.reshape(-1, order='A')
-            else:
+            if chunk.dtype != object:
                 # If we end up here, someone must have hacked around with the filters.
                 # We cannot deal with object arrays unless there is an object
                 # codec in the filter chain, i.e., a filter that converts from object
@@ -1761,9 +1757,10 @@ def _decode_chunk(self, cdata):
                 # array during decoding.
                 raise RuntimeError('cannot read object array without object codec')
         else:
-            chunk = chunk.reshape(-1, order='A').view(self._dtype)
+            chunk = chunk.view(self._dtype)
 
         # ensure correct chunk shape
+        chunk = chunk.reshape(-1, order='A')
         chunk = chunk.reshape(self._chunks, order=self._order)
 
         return chunk

From 3e3920af230e059e84f70563c4f215d60f845aed Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 15:47:53 -0500
Subject: [PATCH 07/21] Consolidate type checks in `_decode_chunk`

As refactoring of the `reshape` step has effectively dropped the
expected `object` type case, the checks for different types is a little
more complicated than needed. To fix this, basically invert and swap the
case ordering. This way we can handle all generally expected types first
and simply cast them. Then we can raise if an `object` type shows up and
is unexpected.
---
 zarr/core.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/zarr/core.py b/zarr/core.py
index ab5de14512..a2a07a29ba 100644
--- a/zarr/core.py
+++ b/zarr/core.py
@@ -1746,18 +1746,17 @@ def _decode_chunk(self, cdata):
 
         # view as numpy array with correct dtype
         chunk = ensure_ndarray(chunk)
-        if self._dtype == object:
-            # special case object dtype, because incorrect handling can lead to
-            # segfaults and other bad things happening
-            if chunk.dtype != object:
-                # If we end up here, someone must have hacked around with the filters.
-                # We cannot deal with object arrays unless there is an object
-                # codec in the filter chain, i.e., a filter that converts from object
-                # array to something else during encoding, and converts back to object
-                # array during decoding.
-                raise RuntimeError('cannot read object array without object codec')
-        else:
+        # special case object dtype, because incorrect handling can lead to
+        # segfaults and other bad things happening
+        if self._dtype != object:
             chunk = chunk.view(self._dtype)
+        elif chunk.dtype != object:
+            # If we end up here, someone must have hacked around with the filters.
+            # We cannot deal with object arrays unless there is an object
+            # codec in the filter chain, i.e., a filter that converts from object
+            # array to something else during encoding, and converts back to object
+            # array during decoding.
+            raise RuntimeError('cannot read object array without object codec')
 
         # ensure correct chunk shape
         chunk = chunk.reshape(-1, order='A')

From a61842bdeeaf7988c1e1886b1beb4c461d829c90 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 20:06:36 -0500
Subject: [PATCH 08/21] Ensure `DictStore` uses `bytes` to store blobs

The `DictStore` is pretty reliant on the fact that values are immutable
and can be easily compared. For example `__eq__` assumes that all
contents can be compared easily. This works fine if the data is `bytes`.
However it doesn't really work for `ndarray`s for example. Previously we
would have stored whatever the user gave us here. This means comparisons
could falldown in those cases as well (much as the example in the
tutorial has highlighted on CI). Now we effectively require that the
data be something that can either be coerced to `bytes` (e.g. via the
new/old buffer protocol) or is `bytes` to begin with. Make sure not to
force this requirement when nesting one `MutableMapping` within another.
---
 zarr/storage.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/zarr/storage.py b/zarr/storage.py
index 6720b42d12..3bfc2e9919 100644
--- a/zarr/storage.py
+++ b/zarr/storage.py
@@ -40,6 +40,7 @@
 from zarr.meta import encode_array_metadata, encode_group_metadata
 from zarr.compat import PY2, binary_type, OrderedDict_move_to_end
 from numcodecs.registry import codec_registry
+from numcodecs.compat import ensure_bytes
 from zarr.errors import (err_contains_group, err_contains_array, err_bad_compressor,
                          err_fspath_exists_notdir, err_read_only, MetadataError)
 
@@ -554,6 +555,8 @@ def __getitem__(self, item):
     def __setitem__(self, item, value):
         with self.write_mutex:
             parent, key = self._require_parent(item)
+            if not isinstance(value, self.cls):
+                value = ensure_bytes(value)
             parent[key] = value
 
     def __delitem__(self, item):

From 0e05be0b2a813fdd75a2418916c3c733a11a0392 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 20:11:03 -0500
Subject: [PATCH 09/21] Drop `test_getsize_ext`

This test case seems to be ill-posed. Anytime we store `object`s to
`Array`s we require an `object_codec` to be specified. Otherwise we have
no clean way to serialize the data. However this `DictStore` test breaks
that assumption by explicitly storing an `object` type in it even though
this would never work for the other stores (particularly when working
with `Array`s). This includes in-memory Zarr `Array`s, which would be
backed by `DictStore`. Given this, we go ahead and drop this test case.
---
 zarr/tests/test_storage.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py
index 33c65f36c9..58c079b965 100644
--- a/zarr/tests/test_storage.py
+++ b/zarr/tests/test_storage.py
@@ -633,14 +633,6 @@ def test_setdel(self):
         store = self.create_store()
         setdel_hierarchy_checks(store)
 
-    def test_getsize_ext(self):
-        store = self.create_store()
-        store['a'] = list(range(10))
-        store['b/c'] = list(range(10))
-        assert -1 == store.getsize()
-        assert -1 == store.getsize('a')
-        assert -1 == store.getsize('b')
-
 
 class TestDirectoryStore(StoreTests, unittest.TestCase):
 

From bbf783eaa31f73c9d00e547ca34980ab48e49d18 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 20:39:36 -0500
Subject: [PATCH 10/21] Change default store to `DictStore`

Instead of using a Python `dict` as the `default` store for a Zarr
`Array`, use the `DictStore`. This ensures that all blobs will be
represented as `bytes` regardless of what the user provided as data.
Thus things like comparisons of stores will work well in the default
case.
---
 zarr/creation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/zarr/creation.py b/zarr/creation.py
index 0184a4a5da..b46adc5b38 100644
--- a/zarr/creation.py
+++ b/zarr/creation.py
@@ -7,7 +7,7 @@
 
 
 from zarr.core import Array
-from zarr.storage import (DirectoryStore, init_array, contains_array, contains_group,
+from zarr.storage import (DictStore, DirectoryStore, init_array, contains_array, contains_group,
                           default_compressor, normalize_storage_path, ZipStore)
 from numcodecs.registry import codec_registry
 from zarr.errors import err_contains_array, err_contains_group, err_array_not_found
@@ -125,7 +125,7 @@ def create(shape, chunks=True, dtype=None, compressor='default',
     return z
 
 
-def normalize_store_arg(store, clobber=False, default=dict):
+def normalize_store_arg(store, clobber=False, default=DictStore):
     if store is None:
         return default()
     elif isinstance(store, str):

From b56c2dda0eb17436502c2dc92f93e5fd910d79a6 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 20:52:16 -0500
Subject: [PATCH 11/21] Update `DictStore` docs to note `Array` uses it

---
 zarr/storage.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/zarr/storage.py b/zarr/storage.py
index 3bfc2e9919..bac4ce7c7a 100644
--- a/zarr/storage.py
+++ b/zarr/storage.py
@@ -485,12 +485,11 @@ class DictStore(MutableMapping):
         >>> type(g.store)
         <class 'zarr.storage.DictStore'>
 
-    Note that the default class when creating an array is the built-in
-    :class:`dict` class, i.e.::
+    Also this is the default class when creating an array. E.g.::
 
         >>> z = zarr.zeros(100)
         >>> type(z.store)
-        <class 'dict'>
+        <class 'zarr.storage.DictStore'>
 
     Notes
     -----

From cf781adf70fffcd5fa8b0e32de224dca6f1ec54f Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 22:34:52 -0500
Subject: [PATCH 12/21] Update `Array`'s `info` examples

As we are now using `DictStore` to back the `Array`, we can correctly
measure how much memory it is using. So update the examples in `info`
and the tutorial to show how much memory is being used. Also update the
store type listed in info as well.
---
 docs/tutorial.rst | 8 ++++----
 zarr/core.py      | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index 606b5acef5..fe65145cb0 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -176,7 +176,7 @@ print some diagnostics, e.g.::
     Read-only          : False
     Compressor         : Blosc(cname='zstd', clevel=3, shuffle=BITSHUFFLE,
                        : blocksize=0)
-    Store type         : builtins.dict
+    Store type         : zarr.storage.DictStore
     No. bytes          : 400000000 (381.5M)
     No. bytes stored   : 3242241 (3.1M)
     Storage ratio      : 123.4
@@ -268,7 +268,7 @@ Here is an example using a delta filter with the Blosc compressor::
     Read-only          : False
     Filter [0]         : Delta(dtype='<i4')
     Compressor         : Blosc(cname='zstd', clevel=1, shuffle=SHUFFLE, blocksize=0)
-    Store type         : builtins.dict
+    Store type         : zarr.storage.DictStore
     No. bytes          : 400000000 (381.5M)
     No. bytes stored   : 648605 (633.4K)
     Storage ratio      : 616.7
@@ -1181,7 +1181,7 @@ ratios, depending on the correlation structure within the data. E.g.::
     Order              : C
     Read-only          : False
     Compressor         : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
-    Store type         : builtins.dict
+    Store type         : zarr.storage.DictStore
     No. bytes          : 400000000 (381.5M)
     No. bytes stored   : 6696010 (6.4M)
     Storage ratio      : 59.7
@@ -1195,7 +1195,7 @@ ratios, depending on the correlation structure within the data. E.g.::
     Order              : F
     Read-only          : False
     Compressor         : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
-    Store type         : builtins.dict
+    Store type         : zarr.storage.DictStore
     No. bytes          : 400000000 (381.5M)
     No. bytes stored   : 4684636 (4.5M)
     Storage ratio      : 85.4
diff --git a/zarr/core.py b/zarr/core.py
index a2a07a29ba..033162be6b 100644
--- a/zarr/core.py
+++ b/zarr/core.py
@@ -1811,10 +1811,10 @@ def info(self):
         Order              : C
         Read-only          : False
         Compressor         : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
-        Store type         : builtins.dict
+        Store type         : zarr.storage.DictStore
         No. bytes          : 4000000 (3.8M)
-        No. bytes stored   : ...
-        Storage ratio      : ...
+        No. bytes stored   : 320
+        Storage ratio      : 12500.0
         Chunks initialized : 0/10
 
         """

From 39e3ab822590b4423ffea4071300d3b65c3e913d Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 22:40:57 -0500
Subject: [PATCH 13/21] Drop `ensure_bytes` definition from `zarr.storage`

As Numcodecs now includes a very versatile and effective `ensure_bytes`
function, there is no need to define our own in `zarr.storage` as well.
So go ahead and drop it.
---
 zarr/storage.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/zarr/storage.py b/zarr/storage.py
index bac4ce7c7a..7a6639af53 100644
--- a/zarr/storage.py
+++ b/zarr/storage.py
@@ -445,23 +445,6 @@ def _init_group_metadata(store, overwrite=False, path=None, chunk_store=None):
     store[key] = encode_group_metadata(meta)
 
 
-def ensure_bytes(s):
-    if isinstance(s, binary_type):
-        return s
-    if isinstance(s, np.ndarray):
-        if PY2:  # pragma: py3 no cover
-            # noinspection PyArgumentList
-            return s.tostring(order='A')
-        else:  # pragma: py2 no cover
-            # noinspection PyArgumentList
-            return s.tobytes(order='A')
-    if hasattr(s, 'tobytes'):
-        return s.tobytes()
-    if PY2 and hasattr(s, 'tostring'):  # pragma: py3 no cover
-        return s.tostring()
-    return memoryview(s).tobytes()
-
-
 def _dict_store_keys(d, prefix='', cls=dict):
     for k in d.keys():
         v = d[k]

From cb14850ed0d1eeac8aeb7e40f48082dbfcd27a66 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 22:48:37 -0500
Subject: [PATCH 14/21] Drop import of `binary_type` in `zarr.storage`

As this is no longer being used by `ensure_bytes` as that function was
dropped, go ahead and drop `binary_type` as well.
---
 zarr/storage.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/zarr/storage.py b/zarr/storage.py
index 7a6639af53..9178e4a6b5 100644
--- a/zarr/storage.py
+++ b/zarr/storage.py
@@ -38,7 +38,7 @@
                        normalize_storage_path, buffer_size,
                        normalize_fill_value, nolock, normalize_dtype)
 from zarr.meta import encode_array_metadata, encode_group_metadata
-from zarr.compat import PY2, binary_type, OrderedDict_move_to_end
+from zarr.compat import PY2, OrderedDict_move_to_end
 from numcodecs.registry import codec_registry
 from numcodecs.compat import ensure_bytes
 from zarr.errors import (err_contains_group, err_contains_array, err_bad_compressor,

From d299b1be50fde8ddaecefc3b04a8eaf7c8dcef6c Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 23:07:59 -0500
Subject: [PATCH 15/21] Take flattened array views to avoid some copies

Make use of Numcodecs' `ensure_ndarray` to take `ndarray` views onto
buffers to be stored in a few cases so as to reshape them and avoid a
copy (thanks to the buffer protocol).
---
 zarr/storage.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/zarr/storage.py b/zarr/storage.py
index 9178e4a6b5..2fb7ad5433 100644
--- a/zarr/storage.py
+++ b/zarr/storage.py
@@ -31,16 +31,13 @@
 import warnings
 
 
-import numpy as np
-
-
 from zarr.util import (normalize_shape, normalize_chunks, normalize_order,
                        normalize_storage_path, buffer_size,
                        normalize_fill_value, nolock, normalize_dtype)
 from zarr.meta import encode_array_metadata, encode_group_metadata
 from zarr.compat import PY2, OrderedDict_move_to_end
 from numcodecs.registry import codec_registry
-from numcodecs.compat import ensure_bytes
+from numcodecs.compat import ensure_bytes, ensure_ndarray
 from zarr.errors import (err_contains_group, err_contains_array, err_bad_compressor,
                          err_fspath_exists_notdir, err_read_only, MetadataError)
 
@@ -726,9 +723,8 @@ def __getitem__(self, key):
 
     def __setitem__(self, key, value):
 
-        # handle F-contiguous numpy arrays
-        if isinstance(value, np.ndarray) and value.flags.f_contiguous:
-            value = ensure_bytes(value)
+        # coerce to flat, contiguous array (ideally without copying)
+        value = ensure_ndarray(value).reshape(-1, order='A')
 
         # destination path for key
         file_path = os.path.join(self.path, key)
@@ -1177,7 +1173,7 @@ def __getitem__(self, key):
     def __setitem__(self, key, value):
         if self.mode == 'r':
             err_read_only()
-        value = ensure_bytes(value)
+        value = ensure_ndarray(value).reshape(-1, order='A')
         with self.mutex:
             self.zf.writestr(key, value)
 

From 205fa163597fa1162b17865f89a6593c703e9602 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 23:32:01 -0500
Subject: [PATCH 16/21] Simplify `buffer_size` by using `ensure_ndarray`

Rewrite `buffer_size` to just use Numcodecs' `ensure_ndarray` to get an
`ndarray` that views the data. Once the `ndarray` is gotten, all that is
needed is to access its `nbytes` member, which returns the number of
bytes that it takes up.
---
 zarr/util.py | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/zarr/util.py b/zarr/util.py
index b79865bfe8..ad882c41d5 100644
--- a/zarr/util.py
+++ b/zarr/util.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, print_function, division
-import operator
 from textwrap import TextWrapper, dedent
 import numbers
 import uuid
@@ -10,10 +9,11 @@
 from asciitree import BoxStyle, LeftAligned
 from asciitree.traversal import Traversal
 import numpy as np
+from numcodecs.compat import ensure_ndarray
 from numcodecs.registry import codec_registry
 
 
-from zarr.compat import PY2, reduce, text_type, binary_type
+from zarr.compat import PY2, text_type, binary_type
 
 
 # codecs to use for object dtype convenience API
@@ -314,17 +314,7 @@ def normalize_storage_path(path):
 
 
 def buffer_size(v):
-    from array import array as _stdlib_array
-    if PY2 and isinstance(v, _stdlib_array):  # pragma: py3 no cover
-        # special case array.array because does not support buffer
-        # interface in PY2
-        return v.buffer_info()[1] * v.itemsize
-    else:  # pragma: py2 no cover
-        v = memoryview(v)
-        if v.shape:
-            return reduce(operator.mul, v.shape) * v.itemsize
-        else:
-            return v.itemsize
+    return ensure_ndarray(v).nbytes
 
 
 def info_text_report(items):

From f6880b907d2da5334b0eaf43d79462bd1e36f4db Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 23:32:31 -0500
Subject: [PATCH 17/21] Test `getsize` for unknown size

---
 zarr/tests/test_storage.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py
index 58c079b965..ba6164da55 100644
--- a/zarr/tests/test_storage.py
+++ b/zarr/tests/test_storage.py
@@ -1088,6 +1088,10 @@ def test_getsize():
     assert 7 == getsize(store)
     assert 5 == getsize(store, 'baz')
 
+    store = dict()
+    store['boo'] = None
+    assert -1 == getsize(store)
+
 
 def test_migrate_1to2():
     from zarr import meta_v1

From bcee828b7275a5c615124f2b196fcebfa46340fb Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 23:42:56 -0500
Subject: [PATCH 18/21] Simplify `ensure_str` in `zarr.meta`

If the data is already a `str` instance, turn `ensure_str` into a no-op.
For all other cases, make use of Numcodecs' `ensure_bytes` to aid
`ensure_str` in coercing data through the buffer protocol. If we are on
Python 3, then decode the `bytes` object to a `str`.
---
 zarr/meta.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/zarr/meta.py b/zarr/meta.py
index 9ce580eff2..7984efb701 100644
--- a/zarr/meta.py
+++ b/zarr/meta.py
@@ -5,9 +5,10 @@
 
 
 import numpy as np
+from numcodecs.compat import ensure_bytes
 
 
-from zarr.compat import PY2, binary_type, Mapping
+from zarr.compat import PY2, Mapping
 from zarr.errors import MetadataError
 
 
@@ -15,14 +16,9 @@
 
 
 def ensure_str(s):
-    if PY2:  # pragma: py3 no cover
-        # noinspection PyUnresolvedReferences
-        if isinstance(s, buffer):  # noqa
-            s = str(s)
-    else:  # pragma: py2 no cover
-        if isinstance(s, memoryview):
-            s = s.tobytes()
-        if isinstance(s, binary_type):
+    if not isinstance(s, str):
+        s = ensure_bytes(s)
+        if not PY2:  # pragma: py2 no cover
             s = s.decode('ascii')
     return s
 

From dfa51f8d8e85f1230f8146f04b84ab74ed83e6e9 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 23:55:30 -0500
Subject: [PATCH 19/21] Drop unknown size cases from `DictStore`

As `DictStore` now must only store `bytes` or types coercible to bytes
via the buffer protocol, there is no possibility for it to have unknown
sizes as `bytes` always have a known size. So drop these cases where the
size can be `-1`.
---
 zarr/storage.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/zarr/storage.py b/zarr/storage.py
index 2fb7ad5433..6e57315377 100644
--- a/zarr/storage.py
+++ b/zarr/storage.py
@@ -634,17 +634,11 @@ def getsize(self, path=None):
             size = 0
             for v in value.values():
                 if not isinstance(v, self.cls):
-                    try:
-                        size += buffer_size(v)
-                    except TypeError:
-                        return -1
+                    size += buffer_size(v)
             return size
 
         else:
-            try:
-                return buffer_size(value)
-            except TypeError:
-                return -1
+            return buffer_size(value)
 
     def clear(self):
         with self.write_mutex:

From d0b80126348c0d447e3662d4806b820853e30db9 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sun, 2 Dec 2018 00:29:17 -0500
Subject: [PATCH 20/21] Cast datetime/timedelta arrays for buffer protocol

Make sure that datetime/timedelta arrays are cast to a type that
supports the buffer protocol. Ensure this is a type that can handle all
of the datetime/timedelta values and has the same itemsize.
---
 zarr/storage.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/zarr/storage.py b/zarr/storage.py
index 6e57315377..228e9cccb5 100644
--- a/zarr/storage.py
+++ b/zarr/storage.py
@@ -719,6 +719,8 @@ def __setitem__(self, key, value):
 
         # coerce to flat, contiguous array (ideally without copying)
         value = ensure_ndarray(value).reshape(-1, order='A')
+        if value.dtype.kind in 'mM':
+            value = value.view('i8')
 
         # destination path for key
         file_path = os.path.join(self.path, key)
@@ -1168,6 +1170,8 @@ def __setitem__(self, key, value):
         if self.mode == 'r':
             err_read_only()
         value = ensure_ndarray(value).reshape(-1, order='A')
+        if value.dtype.kind in 'mM':
+            value = value.view('i8')
         with self.mutex:
             self.zf.writestr(key, value)
 

From 0cf5e5b7ee9fbaf757869b73b566bbeb06cd7e7b Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sun, 2 Dec 2018 00:33:41 -0500
Subject: [PATCH 21/21] Use `ensure_contiguous_ndarray` with stores

Instead of using `ensure_ndarray`, use `ensure_contiguous_ndarray` with
the stores. This ensures that datetime/timedeltas are handled by
default. Also catches things like object arrays. Finally this handles
flattening the array if needed.
---
 zarr/storage.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/zarr/storage.py b/zarr/storage.py
index 228e9cccb5..e013924d91 100644
--- a/zarr/storage.py
+++ b/zarr/storage.py
@@ -37,7 +37,7 @@
 from zarr.meta import encode_array_metadata, encode_group_metadata
 from zarr.compat import PY2, OrderedDict_move_to_end
 from numcodecs.registry import codec_registry
-from numcodecs.compat import ensure_bytes, ensure_ndarray
+from numcodecs.compat import ensure_bytes, ensure_contiguous_ndarray
 from zarr.errors import (err_contains_group, err_contains_array, err_bad_compressor,
                          err_fspath_exists_notdir, err_read_only, MetadataError)
 
@@ -718,9 +718,7 @@ def __getitem__(self, key):
     def __setitem__(self, key, value):
 
         # coerce to flat, contiguous array (ideally without copying)
-        value = ensure_ndarray(value).reshape(-1, order='A')
-        if value.dtype.kind in 'mM':
-            value = value.view('i8')
+        value = ensure_contiguous_ndarray(value)
 
         # destination path for key
         file_path = os.path.join(self.path, key)
@@ -1169,9 +1167,7 @@ def __getitem__(self, key):
     def __setitem__(self, key, value):
         if self.mode == 'r':
             err_read_only()
-        value = ensure_ndarray(value).reshape(-1, order='A')
-        if value.dtype.kind in 'mM':
-            value = value.view('i8')
+        value = ensure_contiguous_ndarray(value)
         with self.mutex:
             self.zf.writestr(key, value)