Skip to content
5 changes: 5 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ Bug fixes
* Always use a ``tuple`` when indexing a NumPy ``ndarray``.
By :user:`John Kirkham <jakirkham>`, :issue:`376`

* Ensure when ``Array`` uses a ``dict``-based chunk store that it only contains
``bytes`` to facilitate comparisons and protect against writes. Drop the copy
for the no filter/compressor case as this handles that case.
By :user:`John Kirkham <jakirkham>`, :issue:`359`

Maintenance
~~~~~~~~~~~

Expand Down
24 changes: 9 additions & 15 deletions zarr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


import numpy as np
from numcodecs.compat import ensure_ndarray
from numcodecs.compat import ensure_bytes, ensure_ndarray


from zarr.util import (is_total_slice, human_readable_size, normalize_resize_args,
Expand Down Expand Up @@ -1677,21 +1677,11 @@ def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=Non

else:

if not self._compressor and not self._filters:

# https://github.com/alimanfoo/zarr/issues/79
# Ensure a copy is taken so we don't end up storing
# a view into someone else's array.
# N.B., this assumes that filters or compressor always
# take a copy and never attempt to apply encoding in-place.
chunk = np.array(value, dtype=self._dtype, order=self._order)

# ensure array is contiguous
if self._order == 'F':
chunk = np.asfortranarray(value, dtype=self._dtype)
else:
# ensure array is contiguous
if self._order == 'F':
chunk = np.asfortranarray(value, dtype=self._dtype)
else:
chunk = np.ascontiguousarray(value, dtype=self._dtype)
chunk = np.ascontiguousarray(value, dtype=self._dtype)

else:
# partially replace the contents of this chunk
Expand Down Expand Up @@ -1788,6 +1778,10 @@ def _encode_chunk(self, chunk):
else:
cdata = chunk

# ensure in-memory data is immutable and easy to compare
if isinstance(self.chunk_store, dict):
cdata = ensure_bytes(cdata)

return cdata

def __repr__(self):
Expand Down
38 changes: 37 additions & 1 deletion zarr/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from zarr.util import buffer_size
from numcodecs import (Delta, FixedScaleOffset, Zlib, Blosc, BZ2, MsgPack, Pickle,
Categorize, JSON, VLenUTF8, VLenBytes, VLenArray)
from numcodecs.compat import ensure_bytes, ensure_ndarray
from numcodecs.tests.common import greetings


Expand Down Expand Up @@ -83,6 +84,31 @@ def create_array(self, read_only=False, **kwargs):
return Array(store, read_only=read_only, cache_metadata=cache_metadata,
cache_attrs=cache_attrs)

def test_store_has_binary_values(self):
# Initialize array
np.random.seed(42)
z = self.create_array(shape=(1050,), chunks=100, dtype='f8', compressor=[])
z[:] = np.random.random(z.shape)

for v in z.chunk_store.values():
try:
ensure_ndarray(v)
except TypeError: # pragma: no cover
pytest.fail("Non-bytes-like value: %s" % repr(v))

def test_store_has_bytes_values(self):
# Test that many stores do hold bytes values.
# Though this is not a strict requirement.
# Should be disabled by any stores that fail this as needed.

# Initialize array
np.random.seed(42)
z = self.create_array(shape=(1050,), chunks=100, dtype='f8', compressor=[])
z[:] = np.random.random(z.shape)

# Check in-memory array only contains `bytes`
assert all([isinstance(v, binary_type) for v in z.chunk_store.values()])

def test_nbytes_stored(self):

# dict as store
Expand Down Expand Up @@ -1401,6 +1427,9 @@ def create_array(read_only=False, **kwargs):
return Array(store, read_only=read_only, cache_metadata=cache_metadata,
cache_attrs=cache_attrs)

def test_store_has_bytes_values(self):
pass # returns values as memoryviews/buffers instead of bytes

def test_nbytes_stored(self):
pass # not implemented

Expand Down Expand Up @@ -1725,6 +1754,9 @@ def __init__(self):
def keys(self):
return self.inner.keys()

def values(self):
return self.inner.values()

def get(self, item, default=None):
try:
return self.inner[item]
Expand All @@ -1735,7 +1767,7 @@ def __getitem__(self, item):
return self.inner[item]

def __setitem__(self, item, value):
self.inner[item] = value
self.inner[item] = ensure_bytes(value)

def __delitem__(self, key):
del self.inner[key]
Expand Down Expand Up @@ -1846,3 +1878,7 @@ def create_array(read_only=False, **kwargs):
init_array(store, **kwargs)
return Array(store, read_only=read_only, cache_metadata=cache_metadata,
cache_attrs=cache_attrs)

def test_store_has_bytes_values(self):
# skip as the cache has no control over how the store provides values
pass